# Training and Testing Model Predictions

In [1]:
#importing packages

import sys, os
import pickle
import numpy as np
import pandas as pd

from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.model_selection import cross_val_score

sys.path.append(os.path.abspath(os.path.join('..', 'utils')))
from utility import evaluate_cat_models

In [2]:
# initialize models

gnb = GaussianNB()
tree = DecisionTreeClassifier()
forest = RandomForestClassifier()
gradient_boost = GradientBoostingClassifier()
ada_boost = AdaBoostClassifier()


### Oversampling model

In [3]:
#importing datasets
train_set_os = pd.read_csv('../Data/ProcessedData/train_set_os.csv')
test_set_os = pd.read_csv('../Data/ProcessedData/test_set_os.csv')
train_set_os.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,0,-1.173108,1,0.907074,1,1,0,0.995575,1.014452,0.320696,0
1,1,-0.685123,1,1.144772,1,1,0,0.425313,0.871351,1.611185,0
2,0,-1.312532,1,0.273213,1,1,0,-0.461971,0.871351,0.894247,0
3,1,0.709118,1,1.857865,0,0,1,0.036527,-1.382476,-1.053436,0
4,0,2.173072,1,0.114748,0,0,1,0.404387,-1.382476,-0.874201,0


In [4]:
X_train_os = train_set_os.drop('Response', axis=1)
y_train_os = train_set_os['Response']
X_test_os = test_set_os.drop('Response', axis=1)
y_test_os = test_set_os['Response']

In [5]:
model_list = [tree, gnb, forest, gradient_boost, ada_boost]

evaluate_cat_models(model_list, X_train_os, X_test_os, y_train_os, y_test_os, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation


Fitting GradientBoostingClassifier()
Done with fitting....
GradientBoostingClassifier() cross validation
Done with cross validation


Fitting AdaBoostClassifier()
Done with fitting....
AdaBoostClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier(),GradientBoostingClassifier(),AdaBoostClassifier()
Cross Validated Accuracy Mean,0.943966,0.784341,0.949923,0.798536,0.796954
Cross Validated Accuracy Std,0.001117,0.001344,0.001077,0.001749,0.001537
Accuracy Score,0.593966,0.783464,0.595317,0.797677,0.795393
Precision Score,0.748814,0.705194,0.783466,0.736805,0.729137
Recall Score,0.282792,0.974187,0.263444,0.926204,0.939969
F1 Score,0.410542,0.818148,0.394302,0.820719,0.821238


### Undersampling

In [6]:
#importing datasets
train_set_us = pd.read_csv('../Data/ProcessedData/train_set_us.csv')
test_set_us = pd.read_csv('../Data/ProcessedData/test_set_us.csv')
train_set_us.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,0,-0.12585,1,1.694903,1,0,0,-1.610919,-1.331072,-0.502065,0
1,1,-1.101418,1,-0.678984,1,1,0,-0.029919,0.869464,-0.800362,0
2,0,0.013516,1,0.112311,0,1,0,-1.610919,-1.832008,1.299644,0
3,0,1.755601,1,0.112311,0,0,1,-0.363326,0.368529,0.703052,0
4,1,1.407184,1,0.112311,0,0,1,2.100059,-1.384744,0.416687,0


In [7]:
X_train_us = train_set_us.drop('Response', axis=1)
y_train_us = train_set_us['Response']
X_test_us = test_set_us.drop('Response', axis=1)
y_test_us = test_set_us['Response']

In [8]:
model_list = [tree, gnb, forest, gradient_boost, ada_boost]

evaluate_cat_models(model_list, X_train_us, X_test_us, y_train_us, y_test_us, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation


Fitting GradientBoostingClassifier()
Done with fitting....
GradientBoostingClassifier() cross validation
Done with cross validation


Fitting AdaBoostClassifier()
Done with fitting....
AdaBoostClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier(),GradientBoostingClassifier(),AdaBoostClassifier()
Cross Validated Accuracy Mean,0.714812,0.784828,0.782386,0.796611,0.795917
Cross Validated Accuracy Std,0.003733,0.001484,0.003126,0.002424,0.002428
Accuracy Score,0.71461,0.7829,0.778409,0.795022,0.793182
Precision Score,0.718778,0.704634,0.732574,0.733271,0.726354
Recall Score,0.705087,0.974134,0.876948,0.927381,0.940801
F1 Score,0.711866,0.817752,0.798286,0.818981,0.819785


### Smote

In [9]:
#importing datasets
train_set_sm = pd.read_csv('../Data/ProcessedData/train_set_sm.csv')
test_set_sm = pd.read_csv('../Data/ProcessedData/test_set_sm.csv')
train_set_sm.head()

Unnamed: 0,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,0,-1.204014,1,0.944521,1,1,0,1.005682,1.021798,0.324447,0
1,1,-0.694981,1,1.193767,1,1,0,0.430212,0.877823,1.623882,0
2,0,-1.349452,1,0.279865,1,1,0,-0.465176,0.877823,0.901974,0
3,1,0.759399,1,1.941505,0,0,1,0.037875,-1.389772,-1.059211,0
4,0,2.286497,1,0.113701,0,0,1,0.409095,-1.389772,-0.878734,0


In [10]:
X_train_sm = train_set_sm.drop('Response', axis=1)
y_train_sm = train_set_sm['Response']
X_test_sm = test_set_sm.drop('Response', axis=1)
y_test_sm = test_set_sm['Response']

In [11]:
model_list = [tree, gnb, forest, gradient_boost, ada_boost]

evaluate_cat_models(model_list, X_train_sm, X_test_sm, y_train_sm, y_test_sm, cv=5)

Fitting DecisionTreeClassifier()
Done with fitting....
DecisionTreeClassifier() cross validation
Done with cross validation


Fitting GaussianNB()
Done with fitting....
GaussianNB() cross validation
Done with cross validation


Fitting RandomForestClassifier()
Done with fitting....
RandomForestClassifier() cross validation
Done with cross validation


Fitting GradientBoostingClassifier()
Done with fitting....
GradientBoostingClassifier() cross validation
Done with cross validation


Fitting AdaBoostClassifier()
Done with fitting....
AdaBoostClassifier() cross validation
Done with cross validation




Unnamed: 0,DecisionTreeClassifier(),GaussianNB(),RandomForestClassifier(),GradientBoostingClassifier(),AdaBoostClassifier()
Cross Validated Accuracy Mean,0.872417,0.777103,0.879413,0.836791,0.823659
Cross Validated Accuracy Std,0.073267,0.001904,0.066333,0.037962,0.031696
Accuracy Score,0.773521,0.775977,0.793183,0.77759,0.793534
Precision Score,0.695683,0.702348,0.716956,0.694605,0.715183
Recall Score,0.97241,0.957914,0.968857,0.990803,0.97559
F1 Score,0.811093,0.810461,0.824087,0.816677,0.825333
