# Training and Testing Model Predictions

In [1]:
#importing packages

import sys, os
import pickle
import numpy as np
import pandas as pd

from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.model_selection import cross_val_score

sys.path.append(os.path.abspath(os.path.join('..', 'utils')))
from utility import evaluate_cat_models

In [3]:
# initialize models

gnb = GaussianNB()
tree = DecisionTreeClassifier()
forest = RandomForestClassifier()
gradient_boost = GradientBoostingClassifier()
ada_boost = AdaBoostClassifier()


### Oversampling model

In [4]:
#importing datasets
train_set_os = pd.read_csv('../Data/ProcessedData/train_set_os.csv')
test_set_os = pd.read_csv('../Data/ProcessedData/test_set_os.csv')
train_set_os.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,0,-1.173108,1,0.907074,1,1,0,0.995575,1.014452,0.320696,0
1,1,-0.685123,1,1.144772,1,1,0,0.425313,0.871351,1.611185,0
2,0,-1.312532,1,0.273213,1,1,0,-0.461971,0.871351,0.894247,0
3,1,0.709118,1,1.857865,0,0,1,0.036527,-1.382476,-1.053436,0
4,0,2.173072,1,0.114748,0,0,1,0.404387,-1.382476,-0.874201,0


In [5]:
X_train = train_set_os.drop('Response', axis=1)
y_train = train_set_os['Response']
X_test = test_set_os.drop('Response', axis=1)
y_test = test_set_os['Response']

In [25]:
model_list = [tree, gnb, forest, gradient_boost, ada_boost]
evaluate_cat_models(model_list, X_train, X_test, y_train, y_test, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation


Fitting GradientBoostingClassifier()
Done with fitting....
GradientBoostingClassifier() cross validation
Done with cross validation


Fitting AdaBoostClassifier()
Done with fitting....
AdaBoostClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier(),GradientBoostingClassifier(),AdaBoostClassifier()
Cross Validated Accuracy Mean,0.712757,0.784828,0.781452,0.796611,0.795917
Cross Validated Accuracy Std,0.003587,0.001484,0.004162,0.002424,0.002428
Accuracy Score,0.713528,0.7829,0.780141,0.795022,0.793182
Precision Score,0.717674,0.704634,0.734444,0.733271,0.726354
Recall Score,0.704004,0.974134,0.877597,0.927381,0.940801
F1 Score,0.710774,0.817752,0.799665,0.818981,0.819785


### Undersampling

In [18]:
tree.__str__()

'DecisionTreeClassifier()'

In [19]:
train_set_us = pd.read_csv('../Data/ProcessedData/train_set_us.csv')
test_set_us = pd.read_csv('../Data/ProcessedData/test_set_us.csv')
train_set_us.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,0,-0.12585,1,1.694903,1,0,0,-1.610919,-1.331072,-0.502065,0
1,1,-1.101418,1,-0.678984,1,1,0,-0.029919,0.869464,-0.800362,0
2,0,0.013516,1,0.112311,0,1,0,-1.610919,-1.832008,1.299644,0
3,0,1.755601,1,0.112311,0,0,1,-0.363326,0.368529,0.703052,0
4,1,1.407184,1,0.112311,0,0,1,2.100059,-1.384744,0.416687,0


In [20]:
X_train = train_set_us.drop('Response', axis=1)
y_train = train_set_us['Response']
X_test = test_set_us.drop('Response', axis=1)
y_test = test_set_us['Response']

In [21]:
model_list_1 = [tree, gnb, forest, gradient_boost, ada_boost]
evaluate_cat_models(model_list, X_train, X_test, y_train, y_test, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier()
Cross Validated Accuracy Mean,0.713931,0.784828,0.782239
Cross Validated Accuracy Std,0.002884,0.001484,0.003202
Accuracy Score,0.713203,0.7829,0.780465
Precision Score,0.717728,0.704634,0.734376
Recall Score,0.702814,0.974134,0.878788
F1 Score,0.710192,0.817752,0.800118


In [22]:
train_set_sm = pd.read_csv('../Data/ProcessedData/train_set_us.csv')
test_set_sm = pd.read_csv('../Data/ProcessedData/test_set_us.csv')
train_set_sm.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,0,-0.12585,1,1.694903,1,0,0,-1.610919,-1.331072,-0.502065,0
1,1,-1.101418,1,-0.678984,1,1,0,-0.029919,0.869464,-0.800362,0
2,0,0.013516,1,0.112311,0,1,0,-1.610919,-1.832008,1.299644,0
3,0,1.755601,1,0.112311,0,0,1,-0.363326,0.368529,0.703052,0
4,1,1.407184,1,0.112311,0,0,1,2.100059,-1.384744,0.416687,0


In [23]:
X_train = train_set_sm.drop('Response', axis=1)
y_train = train_set_sm['Response']
X_test = test_set_sm.drop('Response', axis=1)
y_test = test_set_sm['Response']

In [24]:
model_list_2 = [tree, gnb, forest, gradient_boost, ada_boost]
evaluate_cat_models(model_list, X_train, X_test, y_train, y_test, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier()
Cross Validated Accuracy Mean,0.713731,0.784828,0.781865
Cross Validated Accuracy Std,0.002946,0.001484,0.003103
Accuracy Score,0.713149,0.7829,0.781006
Precision Score,0.717793,0.704634,0.734447
Recall Score,0.702489,0.974134,0.880303
F1 Score,0.710059,0.817752,0.800788
