In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
import pickle

In [3]:
df=pd.read_csv("../resources/medium_loans_final_state.csv")

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,loan_type,loan_amount_000s,action_taken,state_code,applicant_ethnicity,co_applicant_ethnicity,applicant_race_1,co_applicant_race_1,applicant_sex,co_applicant_sex,applicant_income_000s
0,31132844,2,82,1,39,2,5,5,8,2,5,36
1,32078094,1,81,1,48,2,5,5,8,2,5,31
2,31119033,2,63,1,39,2,2,5,5,2,1,82
3,24811761,2,135,1,72,1,5,3,8,1,5,28
4,463658,1,932,1,6,2,2,2,2,1,2,194


In [5]:
retypes={'state_code':'str'
,'loan_type':'str'
,'loan_amount_000s':'int64'
,'action_taken':'int8'
,'applicant_ethnicity':'str'
,'co_applicant_ethnicity':'str'
,'applicant_race_1':'str'
,'co_applicant_race_1':'str'
,'applicant_sex':'str'
,'co_applicant_sex':'str'
,'applicant_income_000s':'int64'}

In [6]:
# categorical fields to strings for one-hot encoding
df = df.astype(retypes)


In [7]:
X = df.drop(columns=['action_taken','Unnamed: 0'])


In [8]:
X_dummies = pd.get_dummies(X)
print(X_dummies.columns)
X_dummies

Index(['loan_amount_000s', 'applicant_income_000s', 'loan_type_1',
       'loan_type_2', 'loan_type_3', 'loan_type_4', 'state_code_1',
       'state_code_10', 'state_code_11', 'state_code_12', 'state_code_13',
       'state_code_15', 'state_code_16', 'state_code_17', 'state_code_18',
       'state_code_19', 'state_code_2', 'state_code_20', 'state_code_21',
       'state_code_22', 'state_code_23', 'state_code_24', 'state_code_25',
       'state_code_26', 'state_code_27', 'state_code_28', 'state_code_29',
       'state_code_30', 'state_code_31', 'state_code_32', 'state_code_33',
       'state_code_34', 'state_code_35', 'state_code_36', 'state_code_37',
       'state_code_38', 'state_code_39', 'state_code_4', 'state_code_40',
       'state_code_41', 'state_code_42', 'state_code_44', 'state_code_45',
       'state_code_46', 'state_code_47', 'state_code_48', 'state_code_49',
       'state_code_5', 'state_code_50', 'state_code_51', 'state_code_53',
       'state_code_54', 'state_code_55', 's

Unnamed: 0,loan_amount_000s,applicant_income_000s,loan_type_1,loan_type_2,loan_type_3,loan_type_4,state_code_1,state_code_10,state_code_11,state_code_12,...,co_applicant_race_1_2,co_applicant_race_1_3,co_applicant_race_1_4,co_applicant_race_1_5,co_applicant_race_1_8,applicant_sex_1,applicant_sex_2,co_applicant_sex_1,co_applicant_sex_2,co_applicant_sex_5
0,82,36,0,1,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,1
1,81,31,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,1,0,0,1
2,63,82,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,1,1,0,0
3,135,28,0,1,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,1
4,932,194,1,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,236,126,0,1,0,0,0,0,0,0,...,0,0,0,1,0,1,0,0,1,0
499996,147,54,1,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,1
499997,50,19,1,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,1
499998,40,19,1,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,1


In [9]:
# Split our preprocessed data into our features and target arrays

y = df['action_taken']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X_dummies, y, random_state=42)

In [10]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Baseline Testing

In [12]:
data = [X_train_scaled, X_test_scaled, y_train, y_test]

In [13]:
def test_model(model, data):
    X_train_scaled, X_test_scaled, y_train, y_test = data
    reg = model.fit(X_train_scaled, y_train)
    print(f'Model: {type(reg).__name__}')
    print(f'Train score: {reg.score(X_train_scaled, y_train)}')
    print(f'Test Score: {reg.score(X_test_scaled, y_test)}\n')
    plt.show()   

In [14]:
#test defaults 
test_model(LogisticRegression(), data)
test_model(AdaBoostClassifier(), data)
test_model(RandomForestClassifier(), data)
test_model(LinearSVC(), data)
test_model(DecisionTreeClassifier(), data)
test_model(ExtraTreesClassifier(), data)
test_model(KNeighborsClassifier(), data)

Model: LogisticRegression
Train score: 0.8911946666666667
Test Score: 0.889776

Model: AdaBoostClassifier
Train score: 0.8915306666666667
Test Score: 0.890344

Model: RandomForestClassifier
Train score: 0.9966106666666666
Test Score: 0.87348





Model: LinearSVC
Train score: 0.8909653333333334
Test Score: 0.889504

Model: DecisionTreeClassifier
Train score: 0.9966693333333333
Test Score: 0.810392

Model: ExtraTreesClassifier
Train score: 0.9966693333333333
Test Score: 0.857896

Model: KNeighborsClassifier
Train score: 0.897024
Test Score: 0.881592



### Grid CV of Best Performing Classifiers

#### Logistic Regression

In [18]:
model= LogisticRegression(random_state=1)
param_grid = {
   'penalty':['l1', 'l2', 'elasticnet'],
    'C': [1000, 100 ,10, 1, 0.1, 0.01],
    'solver': ['newton-cg', 'sag', 'saga', 'liblinear', 'lgbfs'],
              'max_iter': [100, 500, 1000, 2000]}
lr_grid_clf = GridSearchCV(model, param_grid, verbose=3)
lr_grid_clf.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 360 candidates, totalling 1800 fits
[CV 1/5] END C=1000, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.2s
[CV 2/5] END C=1000, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 3/5] END C=1000, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 4/5] END C=1000, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 5/5] END C=1000, max_iter=100, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 1/5] END C=1000, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 2/5] END C=1000, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 3/5] END C=1000, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 4/5] END C=1000, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 5/5] END C=1000, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 1/5] END C=1

[CV 2/5] END C=1000, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time=  16.3s
[CV 3/5] END C=1000, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time=  15.9s
[CV 4/5] END C=1000, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time=  15.6s
[CV 5/5] END C=1000, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time=  16.3s
[CV 1/5] END C=1000, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=   2.4s
[CV 2/5] END C=1000, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=   2.0s
[CV 3/5] END C=1000, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=   1.6s
[CV 4/5] END C=1000, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=   1.9s
[CV 5/5] END C=1000, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=   1.9s
[CV 1/5] END C=1000, max_iter=500, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 2/5] END C=1000, max_iter=500, penalty=l1, solver=lg

[CV 3/5] END C=1000, max_iter=1000, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 4/5] END C=1000, max_iter=1000, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 5/5] END C=1000, max_iter=1000, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 1/5] END C=1000, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.8s
[CV 2/5] END C=1000, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.6s
[CV 3/5] END C=1000, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.6s
[CV 4/5] END C=1000, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.4s
[CV 5/5] END C=1000, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.7s
[CV 1/5] END C=1000, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   8.8s
[CV 2/5] END C=1000, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.6s
[CV 3/5] END C=1000, max_iter=1000, penalty=l2, so

[CV 4/5] END C=1000, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 5/5] END C=1000, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   6.7s
[CV 1/5] END C=1000, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  20.6s
[CV 2/5] END C=1000, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  12.6s
[CV 3/5] END C=1000, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  12.5s
[CV 4/5] END C=1000, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  12.8s
[CV 5/5] END C=1000, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  13.4s
[CV 1/5] END C=1000, max_iter=2000, penalty=l2, solver=liblinear;, score=0.891 total time=   2.4s
[CV 2/5] END C=1000, max_iter=2000, penalty=l2, solver=liblinear;, score=0.891 total time=   2.5s
[CV 3/5] END C=1000, max_iter=2000, penalty=l2, solver=liblinear;, score=0.891 total time=   2.4s
[CV 4/5] END C=1000, max_iter=2000, penalty=l2, solver=li

[CV 3/5] END C=100, max_iter=100, penalty=l2, solver=lgbfs;, score=nan total time=   0.1s
[CV 4/5] END C=100, max_iter=100, penalty=l2, solver=lgbfs;, score=nan total time=   0.1s
[CV 5/5] END C=100, max_iter=100, penalty=l2, solver=lgbfs;, score=nan total time=   0.1s
[CV 1/5] END C=100, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.1s
[CV 2/5] END C=100, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.1s
[CV 3/5] END C=100, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.1s
[CV 4/5] END C=100, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.1s
[CV 5/5] END C=100, max_iter=100, penalty=elasticnet, solver=newton-cg;, score=nan total time=   0.1s
[CV 1/5] END C=100, max_iter=100, penalty=elasticnet, solver=sag;, score=nan total time=   0.1s
[CV 2/5] END C=100, max_iter=100, penalty=elasticnet, solver=sag;, score=nan total time=   0.1s
[CV 3/5] END C=100, max_iter

[CV 5/5] END C=100, max_iter=500, penalty=elasticnet, solver=sag;, score=nan total time=   0.1s
[CV 1/5] END C=100, max_iter=500, penalty=elasticnet, solver=saga;, score=nan total time=   0.1s
[CV 2/5] END C=100, max_iter=500, penalty=elasticnet, solver=saga;, score=nan total time=   0.1s
[CV 3/5] END C=100, max_iter=500, penalty=elasticnet, solver=saga;, score=nan total time=   0.1s
[CV 4/5] END C=100, max_iter=500, penalty=elasticnet, solver=saga;, score=nan total time=   0.1s
[CV 5/5] END C=100, max_iter=500, penalty=elasticnet, solver=saga;, score=nan total time=   0.1s
[CV 1/5] END C=100, max_iter=500, penalty=elasticnet, solver=liblinear;, score=nan total time=   0.1s
[CV 2/5] END C=100, max_iter=500, penalty=elasticnet, solver=liblinear;, score=nan total time=   0.1s
[CV 3/5] END C=100, max_iter=500, penalty=elasticnet, solver=liblinear;, score=nan total time=   0.1s
[CV 4/5] END C=100, max_iter=500, penalty=elasticnet, solver=liblinear;, score=nan total time=   0.1s
[CV 5/5] EN

[CV 2/5] END C=100, max_iter=1000, penalty=elasticnet, solver=lgbfs;, score=nan total time=   0.1s
[CV 3/5] END C=100, max_iter=1000, penalty=elasticnet, solver=lgbfs;, score=nan total time=   0.1s
[CV 4/5] END C=100, max_iter=1000, penalty=elasticnet, solver=lgbfs;, score=nan total time=   0.1s
[CV 5/5] END C=100, max_iter=1000, penalty=elasticnet, solver=lgbfs;, score=nan total time=   0.1s
[CV 1/5] END C=100, max_iter=2000, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 2/5] END C=100, max_iter=2000, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 3/5] END C=100, max_iter=2000, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 4/5] END C=100, max_iter=2000, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 5/5] END C=100, max_iter=2000, penalty=l1, solver=newton-cg;, score=nan total time=   0.1s
[CV 1/5] END C=100, max_iter=2000, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 2/5] END C=100, max_iter=2000, penal

[CV 4/5] END C=10, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 5/5] END C=10, max_iter=100, penalty=l1, solver=sag;, score=nan total time=   0.1s
[CV 1/5] END C=10, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  24.2s
[CV 2/5] END C=10, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  15.0s
[CV 3/5] END C=10, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  15.0s
[CV 4/5] END C=10, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  15.2s
[CV 5/5] END C=10, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  16.3s
[CV 1/5] END C=10, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=   2.3s
[CV 2/5] END C=10, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=   1.8s
[CV 3/5] END C=10, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=   1.6s
[CV 4/5] END C=10, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=   2



[CV 5/5] END C=10, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 1/5] END C=10, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   9.1s
[CV 2/5] END C=10, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 3/5] END C=10, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 4/5] END C=10, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 5/5] END C=10, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   6.9s
[CV 1/5] END C=10, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  22.0s
[CV 2/5] END C=10, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  13.0s
[CV 3/5] END C=10, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  13.3s
[CV 4/5] END C=10, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  13.5s
[CV 5/5] END C=10, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  13.7s
[CV 1/5] E



[CV 5/5] END C=10, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.8s
[CV 1/5] END C=10, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   9.0s
[CV 2/5] END C=10, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 3/5] END C=10, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 4/5] END C=10, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 5/5] END C=10, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   6.4s
[CV 1/5] END C=10, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  20.5s
[CV 2/5] END C=10, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  12.6s
[CV 3/5] END C=10, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  12.7s
[CV 4/5] END C=10, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  13.0s
[CV 5/5] END C=10, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  13.5s
[CV 1/5] E



[CV 5/5] END C=10, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.8s
[CV 1/5] END C=10, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   9.0s
[CV 2/5] END C=10, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 3/5] END C=10, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 4/5] END C=10, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 5/5] END C=10, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   6.5s
[CV 1/5] END C=10, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  20.7s
[CV 2/5] END C=10, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  12.9s
[CV 3/5] END C=10, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  13.1s
[CV 4/5] END C=10, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  13.0s
[CV 5/5] END C=10, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  13.6s



[CV 5/5] END C=10, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.8s
[CV 1/5] END C=10, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   9.0s
[CV 2/5] END C=10, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 3/5] END C=10, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 4/5] END C=10, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 5/5] END C=10, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   6.5s
[CV 1/5] END C=10, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  20.6s
[CV 2/5] END C=10, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  12.7s
[CV 3/5] END C=10, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  12.7s
[CV 4/5] END C=10, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  12.9s
[CV 5/5] END C=10, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  13.5s



[CV 3/5] END C=1, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.4s




[CV 4/5] END C=1, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   4.6s
[CV 5/5] END C=1, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.2s
[CV 1/5] END C=1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   9.6s
[CV 2/5] END C=1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   5.0s
[CV 3/5] END C=1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.9s
[CV 4/5] END C=1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 5/5] END C=1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   6.6s
[CV 1/5] END C=1, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  21.0s
[CV 2/5] END C=1, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  13.1s
[CV 3/5] END C=1, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  12.7s
[CV 4/5] END C=1, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  13.1s
[CV 5/5] END C=1



[CV 3/5] END C=1, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.3s




[CV 4/5] END C=1, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   4.6s
[CV 5/5] END C=1, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.4s
[CV 1/5] END C=1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=  10.8s
[CV 2/5] END C=1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   5.3s
[CV 3/5] END C=1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   5.3s
[CV 4/5] END C=1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   5.4s
[CV 5/5] END C=1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   7.5s
[CV 1/5] END C=1, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  22.9s
[CV 2/5] END C=1, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  14.3s
[CV 3/5] END C=1, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  14.1s
[CV 4/5] END C=1, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  14.1s
[CV 5/5] END C=1



[CV 3/5] END C=1, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.1s




[CV 4/5] END C=1, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   4.1s
[CV 5/5] END C=1, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 1/5] END C=1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   9.4s
[CV 2/5] END C=1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 3/5] END C=1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 4/5] END C=1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 5/5] END C=1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   6.4s
[CV 1/5] END C=1, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  20.7s
[CV 2/5] END C=1, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  12.9s
[CV 3/5] END C=1, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  12.9s
[CV 4/5] END C=1, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  13.3s
[CV 5



[CV 3/5] END C=1, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s




[CV 4/5] END C=1, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   4.2s
[CV 5/5] END C=1, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 1/5] END C=1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   9.3s
[CV 2/5] END C=1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 3/5] END C=1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.9s
[CV 4/5] END C=1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 5/5] END C=1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   6.6s
[CV 1/5] END C=1, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  22.0s
[CV 2/5] END C=1, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  13.4s
[CV 3/5] END C=1, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  13.0s
[CV 4/5] END C=1, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  13.6s
[CV 5



[CV 1/5] END C=0.1, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  25.0s




[CV 2/5] END C=0.1, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  24.8s




[CV 3/5] END C=0.1, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  25.0s




[CV 4/5] END C=0.1, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  25.1s




[CV 5/5] END C=0.1, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  25.1s
[CV 1/5] END C=0.1, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=  46.4s
[CV 2/5] END C=0.1, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=  46.6s
[CV 3/5] END C=0.1, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=  45.8s
[CV 4/5] END C=0.1, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=  43.0s
[CV 5/5] END C=0.1, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=  45.1s
[CV 1/5] END C=0.1, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.2s
[CV 2/5] END C=0.1, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 3/5] END C=0.1, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 4/5] END C=0.1, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 5/5] END C=0.1, max_iter=100, penalty=l1, solver=lgbfs;, score=na



[CV 2/5] END C=0.1, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.1s
[CV 3/5] END C=0.1, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 4/5] END C=0.1, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.7s
[CV 5/5] END C=0.1, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 1/5] END C=0.1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   9.1s
[CV 2/5] END C=0.1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 3/5] END C=0.1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 4/5] END C=0.1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.6s
[CV 5/5] END C=0.1, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   6.4s
[CV 1/5] END C=0.1, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  20.3s
[CV 2/5] END C=0.1, max_iter=100, penalty=l2, solver=saga;, score=0.891 tot



[CV 1/5] END C=0.1, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time= 2.1min




[CV 2/5] END C=0.1, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time= 2.1min




[CV 3/5] END C=0.1, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time= 2.2min




[CV 4/5] END C=0.1, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time= 2.1min




[CV 5/5] END C=0.1, max_iter=500, penalty=l1, solver=saga;, score=0.891 total time= 2.1min
[CV 1/5] END C=0.1, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=  49.0s
[CV 2/5] END C=0.1, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=  47.7s
[CV 3/5] END C=0.1, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=  47.2s
[CV 4/5] END C=0.1, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=  43.5s
[CV 5/5] END C=0.1, max_iter=500, penalty=l1, solver=liblinear;, score=0.891 total time=  46.0s
[CV 1/5] END C=0.1, max_iter=500, penalty=l1, solver=lgbfs;, score=nan total time=   0.2s
[CV 2/5] END C=0.1, max_iter=500, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 3/5] END C=0.1, max_iter=500, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 4/5] END C=0.1, max_iter=500, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 5/5] END C=0.1, max_iter=500, penalty=l1, solver=lgbfs;, score=na



[CV 2/5] END C=0.1, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.2s
[CV 3/5] END C=0.1, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 4/5] END C=0.1, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.7s
[CV 5/5] END C=0.1, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 1/5] END C=0.1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   9.0s
[CV 2/5] END C=0.1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 3/5] END C=0.1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 4/5] END C=0.1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.7s
[CV 5/5] END C=0.1, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   6.5s
[CV 1/5] END C=0.1, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  20.8s
[CV 2/5] END C=0.1, max_iter=500, penalty=l2, solver=saga;, score=0.891 tot



[CV 1/5] END C=0.1, max_iter=1000, penalty=l1, solver=saga;, score=0.891 total time= 4.1min




[CV 2/5] END C=0.1, max_iter=1000, penalty=l1, solver=saga;, score=0.891 total time= 4.1min




[CV 3/5] END C=0.1, max_iter=1000, penalty=l1, solver=saga;, score=0.891 total time=17.5min




[CV 4/5] END C=0.1, max_iter=1000, penalty=l1, solver=saga;, score=0.891 total time= 4.1min
[CV 5/5] END C=0.1, max_iter=1000, penalty=l1, solver=saga;, score=0.891 total time= 4.2min
[CV 1/5] END C=0.1, max_iter=1000, penalty=l1, solver=liblinear;, score=0.891 total time=  48.1s
[CV 2/5] END C=0.1, max_iter=1000, penalty=l1, solver=liblinear;, score=0.891 total time=  46.5s
[CV 3/5] END C=0.1, max_iter=1000, penalty=l1, solver=liblinear;, score=0.891 total time=  45.4s
[CV 4/5] END C=0.1, max_iter=1000, penalty=l1, solver=liblinear;, score=0.891 total time=  44.4s
[CV 5/5] END C=0.1, max_iter=1000, penalty=l1, solver=liblinear;, score=0.891 total time=  47.6s
[CV 1/5] END C=0.1, max_iter=1000, penalty=l1, solver=lgbfs;, score=nan total time=   0.2s
[CV 2/5] END C=0.1, max_iter=1000, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 3/5] END C=0.1, max_iter=1000, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 4/5] END C=0.1, max_iter=1000, penalty=l1, solver=lgbf



[CV 2/5] END C=0.1, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.7s
[CV 3/5] END C=0.1, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.6s
[CV 4/5] END C=0.1, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.1s
[CV 5/5] END C=0.1, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.2s
[CV 1/5] END C=0.1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   9.9s
[CV 2/5] END C=0.1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 3/5] END C=0.1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 4/5] END C=0.1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.9s
[CV 5/5] END C=0.1, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   6.7s
[CV 1/5] END C=0.1, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  21.0s
[CV 2/5] END C=0.1, max_iter=1000, penalty=l2, solver=saga;, scor



[CV 2/5] END C=0.1, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 3/5] END C=0.1, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.7s
[CV 4/5] END C=0.1, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 5/5] END C=0.1, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.6s
[CV 1/5] END C=0.1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   9.0s
[CV 2/5] END C=0.1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.6s
[CV 3/5] END C=0.1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.6s
[CV 4/5] END C=0.1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.6s
[CV 5/5] END C=0.1, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   6.4s
[CV 1/5] END C=0.1, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  20.3s
[CV 2/5] END C=0.1, max_iter=2000, penalty=l2, solver=saga;, scor



[CV 1/5] END C=0.01, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  24.9s




[CV 2/5] END C=0.01, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  23.8s




[CV 3/5] END C=0.01, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  23.8s




[CV 4/5] END C=0.01, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  23.9s




[CV 5/5] END C=0.01, max_iter=100, penalty=l1, solver=saga;, score=0.891 total time=  24.1s
[CV 1/5] END C=0.01, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=   5.1s
[CV 2/5] END C=0.01, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=  11.5s
[CV 3/5] END C=0.01, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=  15.2s
[CV 4/5] END C=0.01, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=   8.2s
[CV 5/5] END C=0.01, max_iter=100, penalty=l1, solver=liblinear;, score=0.891 total time=   7.4s
[CV 1/5] END C=0.01, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 2/5] END C=0.01, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 3/5] END C=0.01, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 4/5] END C=0.01, max_iter=100, penalty=l1, solver=lgbfs;, score=nan total time=   0.1s
[CV 5/5] END C=0.01, max_iter=100, penalty=l1, solver=lgbfs



[CV 3/5] END C=0.01, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 4/5] END C=0.01, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 5/5] END C=0.01, max_iter=100, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.7s
[CV 1/5] END C=0.01, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=12.7min
[CV 2/5] END C=0.01, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   5.4s
[CV 3/5] END C=0.01, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.6s
[CV 4/5] END C=0.01, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   4.3s
[CV 5/5] END C=0.01, max_iter=100, penalty=l2, solver=sag;, score=0.891 total time=   6.3s
[CV 1/5] END C=0.01, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  18.9s
[CV 2/5] END C=0.01, max_iter=100, penalty=l2, solver=saga;, score=0.891 total time=  11.9s
[CV 3/5] END C=0.01, max_iter=100, penalty=l2, solver=saga;, score=0.8



[CV 3/5] END C=0.01, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 4/5] END C=0.01, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 5/5] END C=0.01, max_iter=500, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 1/5] END C=0.01, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   9.5s
[CV 2/5] END C=0.01, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.9s
[CV 3/5] END C=0.01, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 4/5] END C=0.01, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   4.8s
[CV 5/5] END C=0.01, max_iter=500, penalty=l2, solver=sag;, score=0.891 total time=   6.7s
[CV 1/5] END C=0.01, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  20.3s
[CV 2/5] END C=0.01, max_iter=500, penalty=l2, solver=saga;, score=0.891 total time=  14.2s
[CV 3/5] END C=0.01, max_iter=500, penalty=l2, solver=saga;, score=0.8



[CV 3/5] END C=0.01, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.8s
[CV 4/5] END C=0.01, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.0s
[CV 5/5] END C=0.01, max_iter=1000, penalty=l2, solver=newton-cg;, score=0.891 total time=   3.2s
[CV 1/5] END C=0.01, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   8.4s
[CV 2/5] END C=0.01, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.5s
[CV 3/5] END C=0.01, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.6s
[CV 4/5] END C=0.01, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   4.3s
[CV 5/5] END C=0.01, max_iter=1000, penalty=l2, solver=sag;, score=0.891 total time=   6.2s
[CV 1/5] END C=0.01, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  19.0s
[CV 2/5] END C=0.01, max_iter=1000, penalty=l2, solver=saga;, score=0.891 total time=  12.0s
[CV 3/5] END C=0.01, max_iter=1000, penalty=l2, solver=saga;



[CV 3/5] END C=0.01, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 4/5] END C=0.01, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.9s
[CV 5/5] END C=0.01, max_iter=2000, penalty=l2, solver=newton-cg;, score=0.891 total time=   2.8s
[CV 1/5] END C=0.01, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   8.3s
[CV 2/5] END C=0.01, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.5s
[CV 3/5] END C=0.01, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.5s
[CV 4/5] END C=0.01, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   4.3s
[CV 5/5] END C=0.01, max_iter=2000, penalty=l2, solver=sag;, score=0.891 total time=   6.2s
[CV 1/5] END C=0.01, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  19.0s
[CV 2/5] END C=0.01, max_iter=2000, penalty=l2, solver=saga;, score=0.891 total time=  12.0s
[CV 3/5] END C=0.01, max_iter=2000, penalty=l2, solver=saga;

1080 fits failed out of a total of 1800.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/michaelraines/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/michaelraines/opt/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/michaelraines/opt/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver newton-cg supports only 'l

GridSearchCV(estimator=LogisticRegression(random_state=1),
             param_grid={'C': [1000, 100, 10, 1, 0.1, 0.01],
                         'max_iter': [100, 500, 1000, 2000],
                         'penalty': ['l1', 'l2', 'elasticnet'],
                         'solver': ['newton-cg', 'sag', 'saga', 'liblinear',
                                    'lgbfs']},
             verbose=3)

In [19]:
print(lr_grid_clf.best_params_)
print(lr_grid_clf.best_score_)

{'C': 1000, 'max_iter': 100, 'penalty': 'l1', 'solver': 'saga'}
0.8911946666666666


In [20]:
lr_best_params=lr_grid_clf.best_params_
lr_classifier= LogisticRegression(**lr_best_params, random_state=1).fit(X_train_scaled, y_train)
print(f"Training Data Score: {lr_classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {lr_classifier.score(X_test_scaled, y_test)}")


Training Data Score: 0.8911946666666667
Testing Data Score: 0.889776


In [21]:
# export best to compare vs entire dataset
pickle.dump(lr_classifier, open('../resources/lr_classifier.pkl','wb'))

#### Random Forest

In [22]:
model= RandomForestClassifier(random_state=1)

param_grid = {
    'n_estimators': [100,150,200],
    'bootstrap' : [True, False],
    'max_features': ['sqrt', 'log2']
}
rf_grid_clf = GridSearchCV(model, param_grid, verbose=3)
rf_grid_clf.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END bootstrap=True, max_features=sqrt, n_estimators=100;, score=0.877 total time=  39.2s
[CV 2/5] END bootstrap=True, max_features=sqrt, n_estimators=100;, score=0.875 total time=  39.0s
[CV 3/5] END bootstrap=True, max_features=sqrt, n_estimators=100;, score=0.874 total time=  38.7s
[CV 4/5] END bootstrap=True, max_features=sqrt, n_estimators=100;, score=0.875 total time=  38.5s
[CV 5/5] END bootstrap=True, max_features=sqrt, n_estimators=100;, score=0.876 total time=  38.1s
[CV 1/5] END bootstrap=True, max_features=sqrt, n_estimators=150;, score=0.877 total time=  57.3s
[CV 2/5] END bootstrap=True, max_features=sqrt, n_estimators=150;, score=0.876 total time=  57.3s
[CV 3/5] END bootstrap=True, max_features=sqrt, n_estimators=150;, score=0.875 total time=  57.5s
[CV 4/5] END bootstrap=True, max_features=sqrt, n_estimators=150;, score=0.876 total time=  57.4s
[CV 5/5] END bootstrap=True, max_features=sqrt, n_estimat

GridSearchCV(estimator=RandomForestClassifier(random_state=1),
             param_grid={'bootstrap': [True, False],
                         'max_features': ['sqrt', 'log2'],
                         'n_estimators': [100, 150, 200]},
             verbose=3)

In [23]:
print(rf_grid_clf.best_params_)
print(rf_grid_clf.best_score_)

{'bootstrap': True, 'max_features': 'sqrt', 'n_estimators': 200}
0.8763573333333333


In [24]:
rf_best_params=rf_grid_clf.best_params_
rf_classifier= RandomForestClassifier(**rf_best_params, random_state=1).fit(X_train_scaled, y_train)
print(f"Training Data Score: {rf_classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {rf_classifier.score(X_test_scaled, y_test)}")

Training Data Score: 0.9966693333333333
Testing Data Score: 0.874696


In [25]:
pickle.dump(rf_classifier, open('../resources/rf_classifier.pkl','wb'))

#### Ada Boost 

In [26]:
model= AdaBoostClassifier(random_state=1)

param_grid = {
    'learning_rate': [0.001, 0.01, 0.1, 0.5, 0.7],
    'algorithm':['SAMME', 'SAMME.R'],
    'n_estimators': [500, 1000, 1500]
}
ab_grid_clf = GridSearchCV(model, param_grid, verbose=3)
ab_grid_clf.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 1/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=500;, score=0.891 total time= 2.1min
[CV 2/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=500;, score=0.891 total time= 2.1min
[CV 3/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=500;, score=0.891 total time= 2.1min
[CV 4/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=500;, score=0.891 total time= 2.1min
[CV 5/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=500;, score=0.891 total time= 2.1min
[CV 1/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.2min
[CV 2/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.2min
[CV 3/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.4min
[CV 4/5] END algorithm=SAMME, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.6min
[CV 5/5] END algorithm=SA

[CV 2/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.5min
[CV 3/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.5min
[CV 4/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.5min
[CV 5/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1000;, score=0.891 total time= 4.5min
[CV 1/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1500;, score=0.891 total time= 6.8min
[CV 2/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1500;, score=0.891 total time= 6.8min
[CV 3/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1500;, score=0.891 total time= 6.8min
[CV 4/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1500;, score=0.891 total time= 6.8min
[CV 5/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=1500;, score=0.891 total time= 7.1min
[CV 1/5] END algorithm=SAMME.R, learning_rate=0.01, n_estimators

GridSearchCV(estimator=AdaBoostClassifier(random_state=1),
             param_grid={'algorithm': ['SAMME', 'SAMME.R'],
                         'learning_rate': [0.001, 0.01, 0.1, 0.5, 0.7],
                         'n_estimators': [500, 1000, 1500]},
             verbose=3)

In [27]:
print(ab_grid_clf.best_params_)
print(ab_grid_clf.best_score_)

{'algorithm': 'SAMME', 'learning_rate': 0.7, 'n_estimators': 1000}
0.8917733333333333


In [28]:
ab_best_params=ab_grid_clf.best_params_
ab_classifier= AdaBoostClassifier(**ab_best_params, random_state=1).fit(X_train_scaled, y_train)
print(f"Training Data Score: {ab_classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {ab_classifier.score(X_test_scaled, y_test)}")

Training Data Score: 0.8917786666666667
Testing Data Score: 0.89048


In [29]:
pickle.dump(ab_classifier, open('../resources/ab_classifier.pkl','wb'))

#### SVC

In [31]:
model= LinearSVC()
param_grid = {'C': [100, 10, 1.0, 0.1],
              'penalty': ['l1', 'l2'], 
            'max_iter':[1000, 1500]}
svc_grid_clf = GridSearchCV(model, param_grid, verbose=3)
svc_grid_clf.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ....C=100, max_iter=1000, penalty=l1;, score=nan total time=   4.3s
[CV 2/5] END ....C=100, max_iter=1000, penalty=l1;, score=nan total time=   0.7s
[CV 3/5] END ....C=100, max_iter=1000, penalty=l1;, score=nan total time=   0.4s
[CV 4/5] END ....C=100, max_iter=1000, penalty=l1;, score=nan total time=   0.2s
[CV 5/5] END ....C=100, max_iter=1000, penalty=l1;, score=nan total time=   1.0s




[CV 1/5] END ..C=100, max_iter=1000, penalty=l2;, score=0.820 total time= 3.6min




[CV 2/5] END ..C=100, max_iter=1000, penalty=l2;, score=0.837 total time= 3.4min




[CV 3/5] END ..C=100, max_iter=1000, penalty=l2;, score=0.827 total time= 2.9min




[CV 4/5] END ..C=100, max_iter=1000, penalty=l2;, score=0.800 total time= 3.4min




[CV 5/5] END ..C=100, max_iter=1000, penalty=l2;, score=0.829 total time= 4.5min
[CV 1/5] END ....C=100, max_iter=1500, penalty=l1;, score=nan total time=   6.7s
[CV 2/5] END ....C=100, max_iter=1500, penalty=l1;, score=nan total time=   2.4s
[CV 3/5] END ....C=100, max_iter=1500, penalty=l1;, score=nan total time=   1.5s
[CV 4/5] END ....C=100, max_iter=1500, penalty=l1;, score=nan total time=   0.8s
[CV 5/5] END ....C=100, max_iter=1500, penalty=l1;, score=nan total time=   1.7s




[CV 1/5] END ..C=100, max_iter=1500, penalty=l2;, score=0.806 total time= 6.5min




[CV 2/5] END ..C=100, max_iter=1500, penalty=l2;, score=0.798 total time= 6.8min




[CV 3/5] END ..C=100, max_iter=1500, penalty=l2;, score=0.822 total time= 7.0min




[CV 4/5] END ..C=100, max_iter=1500, penalty=l2;, score=0.780 total time= 7.0min




[CV 5/5] END ..C=100, max_iter=1500, penalty=l2;, score=0.838 total time= 7.2min
[CV 1/5] END .....C=10, max_iter=1000, penalty=l1;, score=nan total time=  11.6s
[CV 2/5] END .....C=10, max_iter=1000, penalty=l1;, score=nan total time=   1.0s
[CV 3/5] END .....C=10, max_iter=1000, penalty=l1;, score=nan total time=   0.5s
[CV 4/5] END .....C=10, max_iter=1000, penalty=l1;, score=nan total time=   1.9s
[CV 5/5] END .....C=10, max_iter=1000, penalty=l1;, score=nan total time=   1.1s




[CV 1/5] END ...C=10, max_iter=1000, penalty=l2;, score=0.876 total time= 4.8min




[CV 2/5] END ...C=10, max_iter=1000, penalty=l2;, score=0.863 total time= 4.7min




[CV 3/5] END ...C=10, max_iter=1000, penalty=l2;, score=0.874 total time= 4.9min




[CV 4/5] END ...C=10, max_iter=1000, penalty=l2;, score=0.851 total time= 5.2min




[CV 5/5] END ...C=10, max_iter=1000, penalty=l2;, score=0.852 total time= 4.7min
[CV 1/5] END .....C=10, max_iter=1500, penalty=l1;, score=nan total time=   4.6s
[CV 2/5] END .....C=10, max_iter=1500, penalty=l1;, score=nan total time=   1.2s
[CV 3/5] END .....C=10, max_iter=1500, penalty=l1;, score=nan total time=   2.6s
[CV 4/5] END .....C=10, max_iter=1500, penalty=l1;, score=nan total time=   2.4s
[CV 5/5] END .....C=10, max_iter=1500, penalty=l1;, score=nan total time=   0.9s




[CV 1/5] END ...C=10, max_iter=1500, penalty=l2;, score=0.886 total time= 6.7min




[CV 2/5] END ...C=10, max_iter=1500, penalty=l2;, score=0.889 total time= 6.9min




[CV 3/5] END ...C=10, max_iter=1500, penalty=l2;, score=0.891 total time= 7.0min




[CV 4/5] END ...C=10, max_iter=1500, penalty=l2;, score=0.885 total time= 7.8min




[CV 5/5] END ...C=10, max_iter=1500, penalty=l2;, score=0.884 total time= 7.6min
[CV 1/5] END ....C=1.0, max_iter=1000, penalty=l1;, score=nan total time=   7.2s
[CV 2/5] END ....C=1.0, max_iter=1000, penalty=l1;, score=nan total time=   0.4s
[CV 3/5] END ....C=1.0, max_iter=1000, penalty=l1;, score=nan total time=   2.2s
[CV 4/5] END ....C=1.0, max_iter=1000, penalty=l1;, score=nan total time=   2.3s
[CV 5/5] END ....C=1.0, max_iter=1000, penalty=l1;, score=nan total time=   1.2s




[CV 1/5] END ..C=1.0, max_iter=1000, penalty=l2;, score=0.891 total time= 4.1min




[CV 2/5] END ..C=1.0, max_iter=1000, penalty=l2;, score=0.891 total time= 4.3min




[CV 3/5] END ..C=1.0, max_iter=1000, penalty=l2;, score=0.891 total time= 4.5min




[CV 4/5] END ..C=1.0, max_iter=1000, penalty=l2;, score=0.891 total time= 4.5min




[CV 5/5] END ..C=1.0, max_iter=1000, penalty=l2;, score=0.891 total time= 4.4min
[CV 1/5] END ....C=1.0, max_iter=1500, penalty=l1;, score=nan total time=   6.1s
[CV 2/5] END ....C=1.0, max_iter=1500, penalty=l1;, score=nan total time=   1.6s
[CV 3/5] END ....C=1.0, max_iter=1500, penalty=l1;, score=nan total time=   1.0s
[CV 4/5] END ....C=1.0, max_iter=1500, penalty=l1;, score=nan total time=   0.7s
[CV 5/5] END ....C=1.0, max_iter=1500, penalty=l1;, score=nan total time=   0.5s




[CV 1/5] END ..C=1.0, max_iter=1500, penalty=l2;, score=0.891 total time= 6.6min




[CV 2/5] END ..C=1.0, max_iter=1500, penalty=l2;, score=0.891 total time= 6.9min




[CV 3/5] END ..C=1.0, max_iter=1500, penalty=l2;, score=0.891 total time= 7.0min




[CV 4/5] END ..C=1.0, max_iter=1500, penalty=l2;, score=0.891 total time= 6.9min




[CV 5/5] END ..C=1.0, max_iter=1500, penalty=l2;, score=0.891 total time= 6.7min
[CV 1/5] END ....C=0.1, max_iter=1000, penalty=l1;, score=nan total time=   8.3s
[CV 2/5] END ....C=0.1, max_iter=1000, penalty=l1;, score=nan total time=   2.6s
[CV 3/5] END ....C=0.1, max_iter=1000, penalty=l1;, score=nan total time=   1.2s
[CV 4/5] END ....C=0.1, max_iter=1000, penalty=l1;, score=nan total time=   1.5s
[CV 5/5] END ....C=0.1, max_iter=1000, penalty=l1;, score=nan total time=   0.9s




[CV 1/5] END ..C=0.1, max_iter=1000, penalty=l2;, score=0.891 total time= 4.6min




[CV 2/5] END ..C=0.1, max_iter=1000, penalty=l2;, score=0.891 total time= 4.5min




[CV 3/5] END ..C=0.1, max_iter=1000, penalty=l2;, score=0.891 total time= 4.6min




[CV 4/5] END ..C=0.1, max_iter=1000, penalty=l2;, score=0.891 total time= 3.8min




[CV 5/5] END ..C=0.1, max_iter=1000, penalty=l2;, score=0.891 total time= 2.7min
[CV 1/5] END ....C=0.1, max_iter=1500, penalty=l1;, score=nan total time=   0.4s
[CV 2/5] END ....C=0.1, max_iter=1500, penalty=l1;, score=nan total time=   0.1s
[CV 3/5] END ....C=0.1, max_iter=1500, penalty=l1;, score=nan total time=   0.1s
[CV 4/5] END ....C=0.1, max_iter=1500, penalty=l1;, score=nan total time=   0.1s
[CV 5/5] END ....C=0.1, max_iter=1500, penalty=l1;, score=nan total time=   0.1s




[CV 1/5] END ..C=0.1, max_iter=1500, penalty=l2;, score=0.891 total time= 4.0min




[CV 2/5] END ..C=0.1, max_iter=1500, penalty=l2;, score=0.891 total time= 4.0min




[CV 3/5] END ..C=0.1, max_iter=1500, penalty=l2;, score=0.891 total time= 4.0min




[CV 4/5] END ..C=0.1, max_iter=1500, penalty=l2;, score=0.891 total time=12.5min




[CV 5/5] END ..C=0.1, max_iter=1500, penalty=l2;, score=0.891 total time= 6.0min


40 fits failed out of a total of 80.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
40 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/michaelraines/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/michaelraines/opt/anaconda3/lib/python3.8/site-packages/sklearn/svm/_classes.py", line 257, in fit
    self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
  File "/Users/michaelraines/opt/anaconda3/lib/python3.8/site-packages/sklearn/svm/_base.py", line 1185, in _fit_liblinear
    solver_type = _get_liblinear_solver_type(multi_class, penalty, loss, dual)
  File "/Users/m

GridSearchCV(estimator=LinearSVC(),
             param_grid={'C': [100, 10, 1.0, 0.1], 'max_iter': [1000, 1500],
                         'penalty': ['l1', 'l2']},
             verbose=3)

In [32]:
print(svc_grid_clf.best_params_)
print(svc_grid_clf.best_score_)

{'C': 1.0, 'max_iter': 1500, 'penalty': 'l2'}
0.8911946666666666


In [33]:
svc_best_params=svc_grid_clf.best_params_
svc_classifier= LinearSVC(**svc_best_params).fit(X_train_scaled, y_train)
print(f"Training Data Score: {svc_classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {svc_classifier.score(X_test_scaled, y_test)}")



Training Data Score: 0.8911946666666667
Testing Data Score: 0.889776


In [34]:
pickle.dump(svc_classifier, open('../resources/svc_classifier.pkl','wb'))

#### KNeighbors Classifier

In [11]:
# Find N value

train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    train_score = knn.score(X_train_scaled, y_train)
    test_score = knn.score(X_test_scaled, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")
    
    
plt.plot(range(1, 20, 2), train_scores, marker='o')
plt.plot(range(1, 20, 2), test_scores, marker="x")
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy Score")
plt.show()

KeyboardInterrupt: 

In [11]:
model= KNeighborsClassifier()

param_grid = {
    'n_neighbors': [5,9,13,17, 21],
    'weights': ['uniform', 'distance'],
    'leaf_size': [30, 50, 100, 200]
}
knn_grid_clf = GridSearchCV(model, param_grid, verbose=3)
knn_grid_clf.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV 1/5] END leaf_size=30, n_neighbors=5, weights=uniform;, score=0.883 total time= 6.7min
[CV 2/5] END leaf_size=30, n_neighbors=5, weights=uniform;, score=0.883 total time= 6.9min
[CV 3/5] END leaf_size=30, n_neighbors=5, weights=uniform;, score=0.882 total time= 6.9min
[CV 4/5] END leaf_size=30, n_neighbors=5, weights=uniform;, score=0.882 total time=21.4min
[CV 5/5] END leaf_size=30, n_neighbors=5, weights=uniform;, score=0.883 total time= 6.8min
[CV 1/5] END leaf_size=30, n_neighbors=5, weights=distance;, score=0.869 total time= 6.9min
[CV 2/5] END leaf_size=30, n_neighbors=5, weights=distance;, score=0.867 total time= 6.9min
[CV 3/5] END leaf_size=30, n_neighbors=5, weights=distance;, score=0.866 total time= 6.9min
[CV 4/5] END leaf_size=30, n_neighbors=5, weights=distance;, score=0.868 total time= 6.9min
[CV 5/5] END leaf_size=30, n_neighbors=5, weights=distance;, score=0.867 total time= 7.3min
[CV 1/5] END leaf_size=

[CV 5/5] END leaf_size=50, n_neighbors=17, weights=distance;, score=0.885 total time= 7.2min
[CV 1/5] END leaf_size=50, n_neighbors=21, weights=uniform;, score=0.891 total time=11.4min
[CV 2/5] END leaf_size=50, n_neighbors=21, weights=uniform;, score=0.891 total time= 7.2min
[CV 3/5] END leaf_size=50, n_neighbors=21, weights=uniform;, score=0.891 total time= 7.0min
[CV 4/5] END leaf_size=50, n_neighbors=21, weights=uniform;, score=0.891 total time= 8.1min
[CV 5/5] END leaf_size=50, n_neighbors=21, weights=uniform;, score=0.891 total time=15.7min
[CV 1/5] END leaf_size=50, n_neighbors=21, weights=distance;, score=0.886 total time= 7.2min
[CV 2/5] END leaf_size=50, n_neighbors=21, weights=distance;, score=0.886 total time= 7.4min
[CV 3/5] END leaf_size=50, n_neighbors=21, weights=distance;, score=0.886 total time= 7.5min
[CV 4/5] END leaf_size=50, n_neighbors=21, weights=distance;, score=0.886 total time= 7.2min
[CV 5/5] END leaf_size=50, n_neighbors=21, weights=distance;, score=0.886 t

[CV 4/5] END leaf_size=200, n_neighbors=13, weights=distance;, score=0.883 total time= 7.3min
[CV 5/5] END leaf_size=200, n_neighbors=13, weights=distance;, score=0.882 total time= 7.0min
[CV 1/5] END leaf_size=200, n_neighbors=17, weights=uniform;, score=0.891 total time= 7.0min
[CV 2/5] END leaf_size=200, n_neighbors=17, weights=uniform;, score=0.891 total time= 7.3min
[CV 3/5] END leaf_size=200, n_neighbors=17, weights=uniform;, score=0.891 total time= 7.1min
[CV 4/5] END leaf_size=200, n_neighbors=17, weights=uniform;, score=0.891 total time= 7.2min
[CV 5/5] END leaf_size=200, n_neighbors=17, weights=uniform;, score=0.891 total time= 7.1min
[CV 1/5] END leaf_size=200, n_neighbors=17, weights=distance;, score=0.885 total time= 7.1min
[CV 2/5] END leaf_size=200, n_neighbors=17, weights=distance;, score=0.885 total time= 7.3min
[CV 3/5] END leaf_size=200, n_neighbors=17, weights=distance;, score=0.885 total time= 7.5min
[CV 4/5] END leaf_size=200, n_neighbors=17, weights=distance;, sc

GridSearchCV(estimator=KNeighborsClassifier(),
             param_grid={'leaf_size': [30, 50, 100, 200],
                         'n_neighbors': [5, 9, 13, 17, 21],
                         'weights': ['uniform', 'distance']},
             verbose=3)

In [12]:
print(knn_grid_clf.best_params_)
print(knn_grid_clf.best_score_)

{'leaf_size': 30, 'n_neighbors': 21, 'weights': 'uniform'}
0.8911706666666668


In [None]:
knn_best_params=knn_grid_clf.best_params_
kn_classifier= KNeighborsClassifier(**knn_best_params).fit(X_train_scaled, y_train)
print(f"Training Data Score: {kn_classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {kn_classifier.score(X_test_scaled, y_test)}")

Training Data Score: 0.8914613333333333


In [None]:
pickle.dump(kn_classifier, open('../resources/kn_classifier.pkl','wb'))