### Running a Logistic Regression Model on Multiple Features
----

In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct  5 11:00:56 2018

@author: RaviPandey
"""


# =============================================================================
# Importing Libraries
# =============================================================================

import numpy as np
import pandas as pd


# =============================================================================
# Loading and Exploring data
# =============================================================================

df = pd.read_csv('../data/loan.csv')

df = df.loc[ 0:10000, ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti',
                   'term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs', 'loan_status']]



  interactivity=interactivity, compiler=compiler, result=result)


In [2]:
# dimension of the dataframe
df.shape


(10001, 16)

In [3]:

# Exploring column names
df.columns



Index(['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 'open_acc',
       'revol_bal', 'revol_util', 'total_acc', 'dti', 'term', 'grade',
       'home_ownership', 'verification_status', 'purpose', 'delinq_2yrs',
       'loan_status'],
      dtype='object')

In [4]:
# Exploring first few records
df.head()



Unnamed: 0,loan_amnt,int_rate,annual_inc,inq_last_6mths,open_acc,revol_bal,revol_util,total_acc,dti,term,grade,home_ownership,verification_status,purpose,delinq_2yrs,loan_status
0,5000.0,10.65,24000.0,1.0,3.0,13648.0,83.7,9.0,27.65,36 months,B,RENT,Verified,credit_card,0.0,Fully Paid
1,2500.0,15.27,30000.0,5.0,3.0,1687.0,9.4,4.0,1.0,60 months,C,RENT,Source Verified,car,0.0,Charged Off
2,2400.0,15.96,12252.0,2.0,2.0,2956.0,98.5,10.0,8.72,36 months,C,RENT,Not Verified,small_business,0.0,Fully Paid
3,10000.0,13.49,49200.0,1.0,10.0,5598.0,21.0,37.0,20.0,36 months,C,RENT,Source Verified,other,0.0,Fully Paid
4,3000.0,12.69,80000.0,0.0,15.0,27783.0,53.9,38.0,17.94,60 months,B,RENT,Source Verified,other,0.0,Current


In [5]:
# Getting summarized information
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10001 entries, 0 to 10000
Data columns (total 16 columns):
loan_amnt              10001 non-null float64
int_rate               10001 non-null float64
annual_inc             10001 non-null float64
inq_last_6mths         10001 non-null float64
open_acc               10001 non-null float64
revol_bal              10001 non-null float64
revol_util             9998 non-null float64
total_acc              10001 non-null float64
dti                    10001 non-null float64
term                   10001 non-null object
grade                  10001 non-null object
home_ownership         10001 non-null object
verification_status    10001 non-null object
purpose                10001 non-null object
delinq_2yrs            10001 non-null float64
loan_status            10001 non-null object
dtypes: float64(10), object(6)
memory usage: 1.2+ MB


In [6]:

numeric_features = ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti']

categorical_features = ['term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs']



In [7]:
# =============================================================================
# Applying Transformation to numeric variable
# =============================================================================


# Imputer - missing Value
# StandardScaler
# MaxAbsScaler
# MinMaxScaler
# QuantileTransformers - Uniform Distribution
# PowerTransformer - Gaussian Distrbution
# Normalizer
# FunctionTransformer - Custom Transformer

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer


In [8]:

numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='median')),
        ('log_transformer', FunctionTransformer(np.sqrt))])
  


In [9]:

# =============================================================================
# Applying transformation to Categorical Variable
# =============================================================================


from sklearn.preprocessing import OneHotEncoder
   
categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='most_frequent')),
        ('onehot' , OneHotEncoder(sparse=False))])


In [10]:
# =============================================================================
# Creating Preprocessor
# =============================================================================

from sklearn.compose import ColumnTransformer
    
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])


In [11]:
# =============================================================================
# Model
# =============================================================================

from sklearn.linear_model import LogisticRegression
    
model = Pipeline(steps=[
        ('preprocess', preprocessor),
        ('lr_model', LogisticRegression())])


In [12]:

# =============================================================================
# Applying Model
# =============================================================================
X = df.drop('loan_status', axis=1)
X = X.iloc[1:1000, :]
y = np.where(df['loan_status'].isin(['Charged Off', 'Default']), 1, 0)
y = y[1:1000]

def file_modify(dataset):
    print("Start")
    dataset['home_ownership'] = np.where(
            dataset.home_ownership.isin(['NONE', 'ANY']), 'MORTGAGE', 
            dataset.home_ownership)
    dataset['purpose'] = np.where(
            dataset.purpose.isin(['debt_consolidation', 'credit_card', 'home_improvement']), 
            dataset.purpose, 
            'other')
    dataset['delinq_2yrs'] = np.where(
            dataset.delinq_2yrs > 0, 1, 0 )
    return dataset


In [13]:

X = file_modify(X)

Start


In [14]:
model.fit(X, y)




Pipeline(memory=None,
     steps=[('preprocess', ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
         transformer_weights=None,
         transformers=[('num', Pipeline(memory=None,
     steps=[('imputer', SimpleImputer(copy=True, fill_value=None, missing_values=nan,
       strategy='median', verbose...penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))])

In [15]:
pred = model.predict(X)




In [16]:
# =============================================================================
# Dumping Model
# =============================================================================
from sklearn.externals import joblib

joblib.dump(model, 'first_go.joblib')

['first_go.joblib']

## Feature Extraction
--- 
Select the K-Best Features

In [17]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct  5 13:40:11 2018

@author: RaviPandey
"""

# =============================================================================
# Importing Libraries
# =============================================================================

import numpy as np
import pandas as pd

# =============================================================================
# Loading and Exploring data
# =============================================================================

df = pd.read_csv('../data/loan.csv')

df = df.loc[ 0:9999, ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti',
                   'term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs', 'loan_status']]

  interactivity=interactivity, compiler=compiler, result=result)


In [18]:
# dimension of the dataframe
df.shape

(10000, 16)

In [19]:
# Exploring column names
df.columns

Index(['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 'open_acc',
       'revol_bal', 'revol_util', 'total_acc', 'dti', 'term', 'grade',
       'home_ownership', 'verification_status', 'purpose', 'delinq_2yrs',
       'loan_status'],
      dtype='object')

In [20]:
# Exploring first few records
df.head()


Unnamed: 0,loan_amnt,int_rate,annual_inc,inq_last_6mths,open_acc,revol_bal,revol_util,total_acc,dti,term,grade,home_ownership,verification_status,purpose,delinq_2yrs,loan_status
0,5000.0,10.65,24000.0,1.0,3.0,13648.0,83.7,9.0,27.65,36 months,B,RENT,Verified,credit_card,0.0,Fully Paid
1,2500.0,15.27,30000.0,5.0,3.0,1687.0,9.4,4.0,1.0,60 months,C,RENT,Source Verified,car,0.0,Charged Off
2,2400.0,15.96,12252.0,2.0,2.0,2956.0,98.5,10.0,8.72,36 months,C,RENT,Not Verified,small_business,0.0,Fully Paid
3,10000.0,13.49,49200.0,1.0,10.0,5598.0,21.0,37.0,20.0,36 months,C,RENT,Source Verified,other,0.0,Fully Paid
4,3000.0,12.69,80000.0,0.0,15.0,27783.0,53.9,38.0,17.94,60 months,B,RENT,Source Verified,other,0.0,Current


In [21]:
# Getting summarized information
df.info()

numeric_features = ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti']

categorical_features = ['term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs']


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
loan_amnt              10000 non-null float64
int_rate               10000 non-null float64
annual_inc             10000 non-null float64
inq_last_6mths         10000 non-null float64
open_acc               10000 non-null float64
revol_bal              10000 non-null float64
revol_util             9997 non-null float64
total_acc              10000 non-null float64
dti                    10000 non-null float64
term                   10000 non-null object
grade                  10000 non-null object
home_ownership         10000 non-null object
verification_status    10000 non-null object
purpose                10000 non-null object
delinq_2yrs            10000 non-null float64
loan_status            10000 non-null object
dtypes: float64(10), object(6)
memory usage: 1.2+ MB


In [22]:
# =============================================================================
# Applying Transformation to numeric variable
# =============================================================================


# Imputer - missing Value
# StandardScaler
# MaxAbsScaler
# MinMaxScaler
# QuantileTransformers - Uniform Distribution
# PowerTransformer - Gaussian Distrbution
# Normalizer
# FunctionTransformer - Custom Transformer

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer


numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='median')),
        ('log_transformer', FunctionTransformer(np.sqrt))])


In [23]:
# =============================================================================
# Applying transformation to Categorical Variable
# =============================================================================


from sklearn.preprocessing import OneHotEncoder
   
categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='most_frequent')),
        ('onehot' , OneHotEncoder(sparse=False))])


In [24]:
# =============================================================================
# Creating Preprocessor
# =============================================================================

from sklearn.compose import ColumnTransformer
    
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])


In [25]:
# =============================================================================
# Feature Selection - Select K best Features
# =============================================================================

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

# f_regression, mutual_info_regression
# For classification: chi2, f_classif, mutual_info_classif


In [26]:
# =============================================================================
# Model with feature selection
# =============================================================================

from sklearn.linear_model import LogisticRegression
    
model = Pipeline(steps=[
        ('preprocess', preprocessor),
        ('feature_selection', SelectKBest(score_func= chi2, k = 5) ),
        ('lr_model', LogisticRegression())])


In [27]:
# =============================================================================
# Applying Model
# =============================================================================
X = df.drop('loan_status', axis=1)
X = X.iloc[1:1000, :]
y = np.where(df['loan_status'].isin(['Charged Off', 'Default']), 1, 0)
y = y[1:1000]


In [28]:
def file_modify(dataset):
    print("Start")
    dataset['home_ownership'] = np.where(
            dataset.home_ownership.isin(['NONE', 'ANY']), 'MORTGAGE', 
            dataset.home_ownership)
    dataset['purpose'] = np.where(
            dataset.purpose.isin(['debt_consolidation', 'credit_card', 'home_improvement']), 
            dataset.purpose, 
            'other')
    dataset['delinq_2yrs'] = np.where(
            dataset.delinq_2yrs > 0, 1, 0 )
    return dataset


In [29]:
X = file_modify(X)

model.fit(X, y)

pred = model.predict(X)


Start




In [30]:
# =============================================================================
# Dumping Model
# =============================================================================

from sklearn.externals import joblib

joblib.dump(model, 'model_with_feature_selection_1.joblib')

['model_with_feature_selection_1.joblib']

### Feature Extraction: Select From Models
----

In [31]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct  5 13:40:11 2018

@author: RaviPandey
"""

# =============================================================================
# Importing Libraries
# =============================================================================

import numpy as np
import pandas as pd


# =============================================================================
# Loading and Exploring data
# =============================================================================

df = pd.read_csv('../data/loan.csv')

df = df.loc[ 0:9999, ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti',
                   'term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs', 'loan_status']]


  interactivity=interactivity, compiler=compiler, result=result)


In [32]:
# dimension of the dataframe
df.shape

# Exploring column names
df.columns

# Exploring first few records
df.head()

# Getting summarized information
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
loan_amnt              10000 non-null float64
int_rate               10000 non-null float64
annual_inc             10000 non-null float64
inq_last_6mths         10000 non-null float64
open_acc               10000 non-null float64
revol_bal              10000 non-null float64
revol_util             9997 non-null float64
total_acc              10000 non-null float64
dti                    10000 non-null float64
term                   10000 non-null object
grade                  10000 non-null object
home_ownership         10000 non-null object
verification_status    10000 non-null object
purpose                10000 non-null object
delinq_2yrs            10000 non-null float64
loan_status            10000 non-null object
dtypes: float64(10), object(6)
memory usage: 1.2+ MB


In [33]:


numeric_features = ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti']

categorical_features = ['term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs']








In [34]:
# =============================================================================
# Applying Transformation to numeric variable
# =============================================================================


# Imputer - missing Value
# StandardScaler
# MaxAbsScaler
# MinMaxScaler
# QuantileTransformers - Uniform Distribution
# PowerTransformer - Gaussian Distrbution
# Normalizer
# FunctionTransformer - Custom Transformer

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer


numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='median')),
        ('log_transformer', FunctionTransformer(np.sqrt))])


In [35]:
# =============================================================================
# Applying transformation to Categorical Variable
# =============================================================================


from sklearn.preprocessing import OneHotEncoder
   
categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='most_frequent')),
        ('onehot' , OneHotEncoder(sparse=False))])


In [36]:
# =============================================================================
# Creating Preprocessor
# =============================================================================

from sklearn.compose import ColumnTransformer
    
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])


In [37]:
# =============================================================================
# Feature Selection - Select from models
# =============================================================================

from sklearn import linear_model
reg = linear_model.Lasso(alpha = 0.1)

from sklearn.feature_selection import SelectFromModel


In [38]:
# =============================================================================
# Model with feature selection
# =============================================================================

from sklearn.linear_model import LogisticRegression
    
model = Pipeline(steps=[
        ('preprocess', preprocessor),
        ('feature_selection', SelectFromModel(reg) ),
        ('lr_model', LogisticRegression())])


In [39]:
# =============================================================================
# Applying Model
# =============================================================================



X = df.drop('loan_status', axis=1)
X = X.iloc[1:1000, :]
y = np.where(df['loan_status'].isin(['Charged Off', 'Default']), 1, 0)
y = y[1:1000]


In [40]:
def file_modify(dataset):
    print("Start")
    dataset['home_ownership'] = np.where(
            dataset.home_ownership.isin(['NONE', 'ANY']), 'MORTGAGE', 
            dataset.home_ownership)
    dataset['purpose'] = np.where(
            dataset.purpose.isin(['debt_consolidation', 'credit_card', 'home_improvement']), 
            dataset.purpose, 
            'other')
    dataset['delinq_2yrs'] = np.where(
            dataset.delinq_2yrs > 0, 1, 0 )
    return dataset


In [41]:
X = file_modify(X)

model.fit(X, y)

pred = model.predict(X)


Start




In [42]:
# =============================================================================
# Dumping Model
# =============================================================================


from sklearn.externals import joblib

joblib.dump(model, 'model_with_feature_selection_2.joblib')


['model_with_feature_selection_2.joblib']

### Adding Another Feature Extraction
----
* PCA - Dimensionality Reduction

In [43]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Oct  5 13:40:11 2018

@author: RaviPandey
"""

# =============================================================================
# Importing Libraries
# =============================================================================

import numpy as np
import pandas as pd

In [44]:
# =============================================================================
# Loading and Exploring data
# =============================================================================

df = pd.read_csv('../data/loan.csv')

df = df.loc[ 0:9999, ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti',
                   'term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs', 'loan_status']]


  interactivity=interactivity, compiler=compiler, result=result)


In [45]:
# dimension of the dataframe
df.shape

(10000, 16)

In [46]:
# Exploring column names
df.columns


Index(['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 'open_acc',
       'revol_bal', 'revol_util', 'total_acc', 'dti', 'term', 'grade',
       'home_ownership', 'verification_status', 'purpose', 'delinq_2yrs',
       'loan_status'],
      dtype='object')

In [47]:
# Exploring first few records
df.head()


Unnamed: 0,loan_amnt,int_rate,annual_inc,inq_last_6mths,open_acc,revol_bal,revol_util,total_acc,dti,term,grade,home_ownership,verification_status,purpose,delinq_2yrs,loan_status
0,5000.0,10.65,24000.0,1.0,3.0,13648.0,83.7,9.0,27.65,36 months,B,RENT,Verified,credit_card,0.0,Fully Paid
1,2500.0,15.27,30000.0,5.0,3.0,1687.0,9.4,4.0,1.0,60 months,C,RENT,Source Verified,car,0.0,Charged Off
2,2400.0,15.96,12252.0,2.0,2.0,2956.0,98.5,10.0,8.72,36 months,C,RENT,Not Verified,small_business,0.0,Fully Paid
3,10000.0,13.49,49200.0,1.0,10.0,5598.0,21.0,37.0,20.0,36 months,C,RENT,Source Verified,other,0.0,Fully Paid
4,3000.0,12.69,80000.0,0.0,15.0,27783.0,53.9,38.0,17.94,60 months,B,RENT,Source Verified,other,0.0,Current


In [48]:
# Getting summarized information
df.info()

numeric_features = ['loan_amnt', 'int_rate', 'annual_inc', 'inq_last_6mths', 
                   'open_acc', 'revol_bal', 'revol_util', 'total_acc', 'dti']

categorical_features = ['term', 'grade', 'home_ownership', 'verification_status',
                        'purpose', 'delinq_2yrs']


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
loan_amnt              10000 non-null float64
int_rate               10000 non-null float64
annual_inc             10000 non-null float64
inq_last_6mths         10000 non-null float64
open_acc               10000 non-null float64
revol_bal              10000 non-null float64
revol_util             9997 non-null float64
total_acc              10000 non-null float64
dti                    10000 non-null float64
term                   10000 non-null object
grade                  10000 non-null object
home_ownership         10000 non-null object
verification_status    10000 non-null object
purpose                10000 non-null object
delinq_2yrs            10000 non-null float64
loan_status            10000 non-null object
dtypes: float64(10), object(6)
memory usage: 1.2+ MB


In [49]:
# =============================================================================
# Applying Transformation to numeric variable
# =============================================================================


# Imputer - missing Value
# StandardScaler
# MaxAbsScaler
# MinMaxScaler
# QuantileTransformers - Uniform Distribution
# PowerTransformer - Gaussian Distrbution
# Normalizer
# FunctionTransformer - Custom Transformer

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler


numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='median'))])


In [50]:
# =============================================================================
# Applying transformation to Categorical Variable
# =============================================================================


from sklearn.preprocessing import OneHotEncoder
   
categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='most_frequent')),
        ('onehot' , OneHotEncoder(sparse=False))])


In [51]:
# =============================================================================
# Creating Preprocessor
# =============================================================================

from sklearn.compose import ColumnTransformer
    
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])



In [52]:
# =============================================================================
# Dimensionality Reduction - PCA
# =============================================================================

from sklearn.decomposition import PCA

pca_model = PCA(n_components=3)


In [53]:
# =============================================================================
# Model with feature selection
# =============================================================================

from sklearn.linear_model import LogisticRegression
    
model = Pipeline(steps=[
        ('preprocess', preprocessor),
        ('scaling', StandardScaler()),
        ('feature_selection', pca_model ),
        ('lr_model', LogisticRegression())])


In [54]:
# =============================================================================
# Applying Model
# =============================================================================
X = df.drop('loan_status', axis=1)
X = X.iloc[1:1000, :]
y = np.where(df['loan_status'].isin(['Charged Off', 'Default']), 1, 0)
y = y[1:1000]

def file_modify(dataset):
    print("Start")
    dataset['home_ownership'] = np.where(
            dataset.home_ownership.isin(['NONE', 'ANY']), 'MORTGAGE', 
            dataset.home_ownership)
    dataset['purpose'] = np.where(
            dataset.purpose.isin(['debt_consolidation', 'credit_card', 'home_improvement']), 
            dataset.purpose, 
            'other')
    dataset['delinq_2yrs'] = np.where(
            dataset.delinq_2yrs > 0, 1, 0 )
    return dataset


In [55]:
X = file_modify(X)


Start


In [56]:
model.fit(X, y)




Pipeline(memory=None,
     steps=[('preprocess', ColumnTransformer(n_jobs=None, remainder='drop', sparse_threshold=0.3,
         transformer_weights=None,
         transformers=[('num', Pipeline(memory=None,
     steps=[('imputer', SimpleImputer(copy=True, fill_value=None, missing_values=nan,
       strategy='median', verbose...penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))])

In [57]:
pred = model.predict(X)


In [58]:
# =============================================================================
# Dumping Model
# =============================================================================


from sklearn.externals import joblib

joblib.dump(model, 'model_with_feature_selection_3.joblib')


['model_with_feature_selection_3.joblib']