# Random Forest Classification Model - Medical No Shows

### Import Dependencies

In [1]:
import os
import re
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

### Load Dataset for modelling

In [2]:
data = pd.read_csv('../data/cleanData/appointment_dataset.csv')
print(f"Shape of the data is: {data.shape}")
data.head()

Shape of the data is: (110521, 29)


Unnamed: 0,appointment_id,patient_id,repeat_patient_yn,gender_yn,time_between_sch_appt,same_day_appt_yn,within_week_appt_yn,advanced_appt_yn,monday_yn,tuesday_yn,...,young_adult_yn,adult_yn,senior_yn,welfare_assistance,hypertension,diabetes,alcoholism,handicap_yn,sms_received,no_show_yn
0,5698125,678814354693913,1,0,0,1,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
1,5698246,54593736353128,0,0,0,1,0,0,1,0,...,1,0,0,1,0,0,0,0,0,0
2,5699393,4369164743113,1,0,0,1,0,0,1,0,...,0,1,0,0,1,0,0,0,0,0
3,5694371,54523365344664,0,1,3,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,1
4,5698279,62917816238835,1,0,0,1,0,0,1,0,...,1,0,0,1,0,0,0,0,0,0


In [3]:
#check for nulls and verify that attributes for modelling are the correct datatypes
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 110521 entries, 0 to 110520
Data columns (total 29 columns):
appointment_id                   110521 non-null int64
patient_id                       110521 non-null int64
repeat_patient_yn                110521 non-null int64
gender_yn                        110521 non-null int64
time_between_sch_appt            110521 non-null int64
same_day_appt_yn                 110521 non-null int64
within_week_appt_yn              110521 non-null int64
advanced_appt_yn                 110521 non-null int64
monday_yn                        110521 non-null int64
tuesday_yn                       110521 non-null int64
wednesday_yn                     110521 non-null int64
thursday_yn                      110521 non-null int64
friday_yn                        110521 non-null int64
saturday_yn                      110521 non-null int64
neighborhood_income_lower_yn     110521 non-null int64
neighborhood_income_middle_yn    110521 non-null int64
neigborho

### Split dataset for training vs testing

In [4]:
# store the target variable in y and everything else goes in X
y = data['no_show_yn']

# drop irrelevent columns 
X = data.drop(['appointment_id', 'patient_id', 'time_between_sch_appt'], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=3)

X_train.shape

(82890, 26)

### Create the classifier model and the parameter grid for GridSearch

In [5]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()

param_grid = {"n_estimators" : [10, 20, 50, 100],
             "max_depth" : [None, 6, 8, 10],
             "max_leaf_nodes": [None, 5, 10, 20], 
             "min_impurity_split": [0.1, 0.2, 0.3, 0.4]}

### Initialize the GridSearch to tune hyperparameters

In [6]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(model, param_grid=param_grid, cv=3, verbose=2)

### Execute model tuning

In [7]:
grid.fit(X_train, y_train)

Fitting 3 folds for each of 256 candidates, totalling 768 fits
[CV] max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=20 
[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=20, total=   0.2s
[CV] max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=20 
[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=20, total=   0.2s
[CV] max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=20 
[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=20, total=   0.2s
[CV] max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=50 
[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=50, total=   0.

[CV]  max_depth=None, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=20 
[CV]  max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=20, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=20 
[CV]  max_depth=None, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=20, total=   0.2s
[CV] max_depth=None, max_l

[CV]  max_depth=None, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=None, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=None, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=20 
[CV]  max_depth=None, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=20, total=   0.2s
[CV] max_depth=None

[CV]  max_depth=None, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=None, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=None, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=None, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=None, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=None, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=None, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=None, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth

[CV]  max_depth=None, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=None, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=None, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=None, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=None, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=None, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=None, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=6, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=6, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=6, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=6, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=6,

[CV]  max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50, total=   0.3s
[CV] max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50 
[CV]  max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50, total=   0.3s
[CV] max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=6, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=6, max_leaf_nod

[CV]  max_depth=6, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=6, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=6, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=6, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=6, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=6, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=6, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=6, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=6, max_leaf_nodes=10, min_impurity_spl

[CV]  max_depth=6, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=50, total=   0.3s
[CV] max_depth=6, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=6, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=6, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=6, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=6, max_leaf_nodes=20, min_impur

[CV]  max_depth=6, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=50, total=   0.3s
[CV] max_depth=6, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=6, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=6, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=8, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=8, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=8, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=8, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=8, max_leaf_nodes=None,

[CV]  max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50 
[CV]  max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=8, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=8, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=8, max_leaf_nod

[CV]  max_depth=8, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=8, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=8, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=8, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=8, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=8, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=8, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=8, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=8, max_leaf_nodes=10, min_impurity_spl

[CV]  max_depth=8, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=8, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=8, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=8, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=8, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=8, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=8, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=8, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=8, max_leaf_nodes=20, min_impur

[CV]  max_depth=8, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=8, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=8, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=8, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=8, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.5s
[CV] max_depth=10, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=10, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=10, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=10, max_leaf_nodes=None, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=10, max_leaf_nodes=

[CV]  max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50 
[CV]  max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=None, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=10, max_leaf_nodes=5, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=10, 

[CV]  max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=50 
[CV]  max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=5, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=10, max_leaf_nodes=10, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=10, max_leaf_nodes=10, min_im

[CV]  max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=50 
[CV]  max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=10, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10 
[CV]  max_depth=10, max_leaf_nodes=20, min_impurity_split=0.1, n_estimators=10, total=   0.1s
[CV] max_depth=10, max_leaf_nodes=2

[CV]  max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=50 
[CV]  max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=50, total=   0.2s
[CV] max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.4s
[CV] max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100 
[CV]  max_depth=10, max_leaf_nodes=20, min_impurity_split=0.4, n_estimators=100, total=   0.4s


[Parallel(n_jobs=1)]: Done 768 out of 768 | elapsed:  4.2min finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rando

### View model results

In [14]:
print(f"Best parameters: {grid.best_params_}")
print(f"Train set score: {grid.score(X_train, y_train)}")
print(f"Test set score: {grid.score(X_test, y_test)}")

Best parameters: {'max_depth': None, 'max_leaf_nodes': None, 'min_impurity_split': 0.1, 'n_estimators': 10}
Train set score: 0.9992158282060563
Test set score: 0.9990952191379248


## Save the model for hosting

In [15]:
import pickle

with open('RF_model.pkl','wb') as f:
    pickle.dump(grid, f)

### Load saved model

In [16]:
with open('RF_model.pkl', 'rb') as f:
    model = pickle.load(f)

### Test saved model

In [17]:
print(f"Test set score: {model.score(X_test, y_test)}")

Test set score: 0.9990952191379248


In [18]:
model

GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rando

### Check out model coefficients

In [19]:
model.cv_results_

{'mean_fit_time': array([0.1015691 , 0.16423972, 0.43598596, 0.80407548, 0.08823967,
        0.16948628, 0.40618928, 0.89004564, 0.07880298, 0.16102028,
        0.43820794, 0.70035585, 0.0473055 , 0.08208028, 0.18301876,
        0.35851288, 0.06948574, 0.1311458 , 0.3088944 , 0.6529723 ,
        0.06946468, 0.12746874, 0.34423383, 0.75540503, 0.07143958,
        0.139292  , 0.32294043, 0.5686926 , 0.04686666, 0.09134428,
        0.20126669, 0.40950425, 0.09409396, 0.16128755, 0.35153786,
        0.72977527, 0.08253988, 0.16012891, 0.38855831, 0.68923664,
        0.08223208, 0.14472429, 0.32060401, 0.61970774, 0.04839301,
        0.08425085, 0.19561569, 0.38079882, 0.08956559, 0.15584938,
        0.3894736 , 0.73767281, 0.09796127, 0.16419323, 0.37162232,
        0.77333275, 0.07793903, 0.13623269, 0.37030975, 0.66458909,
        0.04719933, 0.08631905, 0.20021836, 0.3784527 , 0.08637842,
        0.15817873, 0.37604618, 0.74131242, 0.08431999, 0.16883604,
        0.37902872, 0.72203048,