# Library

In [9]:
import time 
import pickle 

import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report 

# Load Data

In [11]:
data = pd.read_csv('Prediction Insurance.csv')
data.head(1)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28,0,> 2 Years,Yes,40454,26,217,1


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 381109 entries, 0 to 381108
Data columns (total 12 columns):
 #   Column                Non-Null Count   Dtype 
---  ------                --------------   ----- 
 0   id                    381109 non-null  int64 
 1   Gender                381109 non-null  object
 2   Age                   381109 non-null  int64 
 3   Driving_License       381109 non-null  int64 
 4   Region_Code           381109 non-null  int64 
 5   Previously_Insured    381109 non-null  int64 
 6   Vehicle_Age           381109 non-null  object
 7   Vehicle_Damage        381109 non-null  object
 8   Annual_Premium        381109 non-null  int64 
 9   Policy_Sales_Channel  381109 non-null  int64 
 10  Vintage               381109 non-null  int64 
 11  Response              381109 non-null  int64 
dtypes: int64(9), object(3)
memory usage: 34.9+ MB


In [13]:
data.shape

(381109, 12)

# Exploratory Data Analysis

In [14]:
data.groupby(['Previously_Insured','Response']).agg({'id':'count'}).unstack()

Unnamed: 0_level_0,id,id
Response,0,1
Previously_Insured,Unnamed: 1_level_2,Unnamed: 2_level_2
0,159929,46552
1,174470,158


In [15]:
data.groupby(['Gender','Response']).agg({'id':'count'}).unstack()

Unnamed: 0_level_0,id,id
Response,0,1
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2
Female,156835,18185
Male,177564,28525


In [16]:
data.groupby(['Driving_License','Response']).agg({'id':'count'}).unstack()

Unnamed: 0_level_0,id,id
Response,0,1
Driving_License,Unnamed: 1_level_2,Unnamed: 2_level_2
0,771,41
1,333628,46669


# Data Preprocessing

In [18]:
data.select_dtypes('object').head()

Unnamed: 0,Gender,Vehicle_Age,Vehicle_Damage
0,Male,> 2 Years,Yes
1,Male,1-2 Year,No
2,Male,> 2 Years,Yes
3,Male,< 1 Year,No
4,Female,< 1 Year,No


In [19]:
data['Vehicle_Age'].unique()

array(['> 2 Years', '1-2 Year', '< 1 Year'], dtype=object)

In [20]:
data['Vehicle_Damage'].unique()

array(['Yes', 'No'], dtype=object)

In [21]:
data['Gender'] = data['Gender'].map({'Male':0, 'Female':1})
data['Vehicle_Age'] = data['Vehicle_Age'].map({'< 1 Year':0, 
                                               '1-2 Year':1,
                                               '> 2 Years':2})
data['Vehicle_Damage'] = data['Vehicle_Damage'].map({'No':0, 'Yes':1})
data.head(5)

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,0,44,1,28,0,2,1,40454,26,217,1
1,2,0,76,1,3,0,1,0,33536,26,183,0
2,3,0,47,1,28,0,2,1,38294,26,27,1
3,4,0,21,1,11,1,0,0,28619,152,203,0
4,5,1,29,1,41,1,0,0,27496,152,39,0


In [23]:
x = data.drop('Response', axis=1) 
y = data['Response'] 

x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)

# Model Training & Evaluation

In [24]:
start = time.time()
model = DecisionTreeClassifier()

model.fit(x_train, y_train)

stop = time.time()
print(f"Training time: {stop-start} seconds.....")

Training time: 9.250371217727661 seconds.....


In [25]:
y_predict = model.predict(x_test)
print(classification_report(y_predict, y_test))

              precision    recall  f1-score   support

           0       0.89      0.90      0.90     66206
           1       0.31      0.29      0.30     10016

    accuracy                           0.82     76222
   macro avg       0.60      0.60      0.60     76222
weighted avg       0.82      0.82      0.82     76222



# Dump The Model

In [26]:
with open('modelDT.pkl','wb') as file:
    pickle.dump(model, file)