# HOMEWORK - MACHINE LEARNING

Create Machine Learning Model to Predict Insurance Company Offering Response

In [1]:
#!python3

#load library
import time #menghitung waktu
import pickle #tipe data menyimpan model

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier #metode belajar dengan model decision tree
from sklearn.model_selection import train_test_split #proses belajar
from sklearn.metrics import classification_report #hasil belajar

# DATA UNDERSTANDING

In [2]:
data = pd.read_csv('Prediction Insurance.csv')
data.head()

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28,0,> 2 Years,Yes,40454,26,217,1
1,2,Male,76,1,3,0,1-2 Year,No,33536,26,183,0
2,3,Male,47,1,28,0,> 2 Years,Yes,38294,26,27,1
3,4,Male,21,1,11,1,< 1 Year,No,28619,152,203,0
4,5,Female,29,1,41,1,< 1 Year,No,27496,152,39,0


In [3]:
data.shape

(381109, 12)

In [4]:
data['Driving_License'].unique()

array([1, 0], dtype=int64)

In [5]:
data['Region_Code'].unique()

array([28,  3, 11, 41, 33,  6, 35, 50, 15, 45,  8, 36, 30, 26, 16, 47, 48,
       19, 39, 23, 37,  5, 17,  2,  7, 29, 46, 27, 25, 13, 18, 20, 49, 22,
       44,  0,  9, 31, 12, 34, 21, 10, 14, 38, 24, 40, 43, 32,  4, 51, 42,
        1, 52], dtype=int64)

In [6]:
data['Previously_Insured'].unique()

array([0, 1], dtype=int64)

In [7]:
data['Vehicle_Age'].value_counts()

Vehicle_Age
1-2 Year     200316
< 1 Year     164786
> 2 Years     16007
Name: count, dtype: int64

# DATA PREPROCESSING

In [8]:
data.columns

Index(['id', 'Gender', 'Age', 'Driving_License', 'Region_Code',
       'Previously_Insured', 'Vehicle_Age', 'Vehicle_Damage', 'Annual_Premium',
       'Policy_Sales_Channel', 'Vintage', 'Response'],
      dtype='object')

In [9]:
#Drop Unnecessary Column
data = data.drop('id', axis=1)

#Number Encoding
data['Gender'] = data['Gender'].map({'Male':1, 'Female':0})
data['Vehicle_Age'] = data['Vehicle_Age'].map({'> 2 Years':3, '1-2 Year':2,'< 1 Year':1})
data['Vehicle_Damage'] = data['Vehicle_Damage'].map({'Yes':1, 'No':0})

#One-Hot Encoding
df_region = pd.get_dummies(data['Region_Code'],dtype=int, prefix = 'reg')

#df = data[['Gender','Age','Driving_License','Response','Previously_Insured', 'Vehicle_Age']].join(df_region)

df = data[['Gender','Age','Driving_License','Previously_Insured', 'Vehicle_Age','Vehicle_Damage','Annual_Premium','Vintage','Response']].merge(df_region, left_index=True, right_index=True)

df.head()

Unnamed: 0,Gender,Age,Driving_License,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Vintage,Response,reg_0,...,reg_43,reg_44,reg_45,reg_46,reg_47,reg_48,reg_49,reg_50,reg_51,reg_52
0,1,44,1,0,3,1,40454,217,1,0,...,0,0,0,0,0,0,0,0,0,0
1,1,76,1,0,2,0,33536,183,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,47,1,0,3,1,38294,27,1,0,...,0,0,0,0,0,0,0,0,0,0
3,1,21,1,1,1,0,28619,203,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,29,1,1,1,0,27496,39,0,0,...,0,0,0,0,0,0,0,0,0,0


# ML MODELLING AND EVALUATION

In [10]:
#split feature and label
x = df.drop('Response', axis=1) #inputan
y = df['Response'] #outputan

#train-test split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)

In [11]:
# ML Moelling with decision tree
from sklearn.tree import DecisionTreeClassifier # import decision tree dari sklearn

start = time.time()
model = DecisionTreeClassifier(random_state=0) # inisiasi object dengan nama dt

#hasil belajar
model.fit(x_train, y_train)
stop = time.time()
print(f"Training Time {stop-start} Sekon.....")

Training Time 7.782812118530273 Sekon.....


In [12]:
#model evaluation
y_predict = model.predict(x_test)
print(classification_report(y_predict, y_test))

              precision    recall  f1-score   support

           0       0.90      0.90      0.90     66433
           1       0.30      0.29      0.29      9789

    accuracy                           0.82     76222
   macro avg       0.60      0.59      0.60     76222
weighted avg       0.82      0.82      0.82     76222



In [14]:
#model export
with open('model_decisiontree_malik.pkl','wb') as file:
    pickle.dump(model, file)