# Training and Testing Model Predictions

In [1]:
#importing packages

import sys, os
import pickle
import numpy as np
import pandas as pd

from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.model_selection import cross_val_score

sys.path.append(os.path.abspath(os.path.join('..', 'utils')))
from utility import evaluate_cat_models

In [2]:
# initialize models

gnb = GaussianNB()
tree = DecisionTreeClassifier()
forest = RandomForestClassifier()
gradient_boost = GradientBoostingClassifier()
ada_boost = AdaBoostClassifier()

### Oversampling model

In [3]:
#importing datasets
train_set_os = pd.read_csv('../Data/ProcessedData/train_set_os.csv')
test_set_os = pd.read_csv('../Data/ProcessedData/test_set_os.csv')
train_set_os.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,-0.334628,1,1.539172,0,0,1,0.72077,-1.385233,-0.851489,0
1,0,-1.170958,1,-0.993342,1,1,0,-1.584491,0.870169,0.332837,0
2,1,0.850173,1,0.114633,0,0,1,-0.188064,0.368969,0.117505,0
3,0,-0.752793,1,1.776595,0,1,0,-0.157233,0.870169,0.021802,0
4,1,1.198644,1,1.618313,1,0,0,-0.03877,0.368969,0.320874,0


In [4]:
X_train_os = train_set_os.drop('Response', axis=1)
y_train_os = train_set_os['Response']
X_test_os = test_set_os.drop('Response', axis=1)
y_test_os = test_set_os['Response']

In [5]:
model_list = [tree, gnb, forest, gradient_boost, ada_boost]

evaluate_cat_models(model_list, X_train_os, X_test_os, y_train_os, y_test_os, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation


Fitting GradientBoostingClassifier()
Done with fitting....
GradientBoostingClassifier() cross validation
Done with cross validation


Fitting AdaBoostClassifier()
Done with fitting....
AdaBoostClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier(),GradientBoostingClassifier(),AdaBoostClassifier()
Cross Validated Accuracy Mean,0.943692,0.784706,0.949859,0.797983,0.796407
Cross Validated Accuracy Std,0.000877,0.000784,0.000842,0.000794,0.000783
Accuracy Score,0.598272,0.783007,0.59819,0.798972,0.796279
Precision Score,0.75123,0.704155,0.787254,0.735298,0.730101
Recall Score,0.293854,0.976128,0.269102,0.934276,0.94008
F1 Score,0.422458,0.81813,0.401099,0.82293,0.821891


### Undersampling

In [8]:
#importing datasets
train_set_us = pd.read_csv('../Data/ProcessedData/train_set_us.csv')
test_set_us = pd.read_csv('../Data/ProcessedData/test_set_us.csv')
train_set_us.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,-0.539194,1,0.116811,1,1,0,0.015827,0.868155,1.553297,0
1,0,-0.956968,1,0.274653,1,1,0,0.12522,0.868155,-1.41481,0
2,0,-1.235484,1,0.116811,1,1,0,0.086167,1.011236,0.404353,0
3,0,-1.165855,1,0.116811,0,1,0,1.869439,0.868155,0.117117,0
4,0,2.037081,1,1.537387,1,0,0,2.589444,-1.385372,1.349838,0


In [9]:
X_train_us = train_set_us.drop('Response', axis=1)
y_train_us = train_set_us['Response']
X_test_us = test_set_us.drop('Response', axis=1)
y_test_us = test_set_us['Response']

In [10]:
model_list = [tree, gnb, forest, gradient_boost, ada_boost]

evaluate_cat_models(model_list, X_train_us, X_test_us, y_train_us, y_test_us, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation


Fitting GradientBoostingClassifier()
Done with fitting....
GradientBoostingClassifier() cross validation
Done with cross validation


Fitting AdaBoostClassifier()
Done with fitting....
AdaBoostClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier(),GradientBoostingClassifier(),AdaBoostClassifier()
Cross Validated Accuracy Mean,0.712738,0.78321,0.78325,0.797659,0.79666
Cross Validated Accuracy Std,0.002548,0.002262,0.002676,0.002745,0.003124
Accuracy Score,0.712523,0.779052,0.779378,0.794706,0.791064
Precision Score,0.716118,0.700422,0.733636,0.732265,0.726389
Recall Score,0.704207,0.975215,0.877269,0.929123,0.933906
F1 Score,0.710112,0.815286,0.799049,0.819031,0.817179


### Smote

In [12]:
#importing datasets
train_set_sm = pd.read_csv('../Data/ProcessedData/train_set_sm.csv')
test_set_sm = pd.read_csv('../Data/ProcessedData/test_set_sm.csv')
train_set_sm.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,-0.331165,1,1.609263,0,0,1,0.727775,-1.388237,-0.85654,0
1,0,-1.203193,1,-1.04552,1,1,0,-1.598957,0.877675,0.336057,0
2,1,0.904207,1,0.115948,0,0,1,-0.189524,0.374139,0.119221,0
3,0,-0.767179,1,1.858149,0,1,0,-0.158406,0.877675,0.02285,0
4,1,1.267552,1,1.692225,1,0,0,-0.038839,0.374139,0.324011,0


In [13]:
X_train_sm = train_set_sm.drop('Response', axis=1)
y_train_sm = train_set_sm['Response']
X_test_sm = test_set_sm.drop('Response', axis=1)
y_test_sm = test_set_sm['Response']

In [14]:
model_list = [tree, gnb, forest, gradient_boost, ada_boost]

evaluate_cat_models(model_list, X_train_sm, X_test_sm, y_train_sm, y_test_sm, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation


Fitting GradientBoostingClassifier()
Done with fitting....
GradientBoostingClassifier() cross validation
Done with cross validation


Fitting AdaBoostClassifier()
Done with fitting....
AdaBoostClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier(),GradientBoostingClassifier(),AdaBoostClassifier()
Cross Validated Accuracy Mean,0.871681,0.776983,0.879597,0.837067,0.823698
Cross Validated Accuracy Std,0.073998,0.002756,0.065209,0.037713,0.030366
Accuracy Score,0.630537,0.774749,0.793019,0.777099,0.793369
Precision Score,0.577595,0.700832,0.718405,0.694191,0.714907
Recall Score,0.971681,0.958775,0.963833,0.99057,0.975919
F1 Score,0.724517,0.809758,0.823216,0.816311,0.825267
