# $$Health-Insurance-Cross-Sell-Prediction$$


## Importing the libraries

In [1]:
import numpy as np
import datetime
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.3.0'

## Importing the dataset

In [3]:
dataset = pd.read_csv('train.csv')
X = dataset.iloc[:, 1:-1].values
Y = dataset.iloc[:, -1].values

print(X.shape)
print(Y.shape)

(381109, 10)
(381109,)


In [4]:
# Visualising dataframe
dataset.head()

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28.0,0,> 2 Years,Yes,40454.0,26.0,217,1
1,2,Male,76,1,3.0,0,1-2 Year,No,33536.0,26.0,183,0
2,3,Male,47,1,28.0,0,> 2 Years,Yes,38294.0,26.0,27,1
3,4,Male,21,1,11.0,1,< 1 Year,No,28619.0,152.0,203,0
4,5,Female,29,1,41.0,1,< 1 Year,No,27496.0,152.0,39,0


In [5]:
# Checking Missing values
dataset.isna().sum()

id                      0
Gender                  0
Age                     0
Driving_License         0
Region_Code             0
Previously_Insured      0
Vehicle_Age             0
Vehicle_Damage          0
Annual_Premium          0
Policy_Sales_Channel    0
Vintage                 0
Response                0
dtype: int64

In [6]:
print(dataset['Driving_License'].value_counts())

1    380297
0       812
Name: Driving_License, dtype: int64


In [7]:
print(dataset['Previously_Insured'].value_counts())

0    206481
1    174628
Name: Previously_Insured, dtype: int64


In [8]:
pd.crosstab(dataset['Response'], dataset['Previously_Insured'])

Previously_Insured,0,1
Response,Unnamed: 1_level_1,Unnamed: 2_level_1
0,159929,174470
1,46552,158


## Encoding categorical data

In [9]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct= ColumnTransformer([('encoder', OneHotEncoder(),[0]),
                                 ('encoder1', OneHotEncoder(),[5]),
                                 ('encoder2', OneHotEncoder(),[6])], remainder ='passthrough')
X=np.array(ct.fit_transform(X))


## Encoding the Dependent Variable

In [10]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Y= le.fit_transform(Y)
print(Y)

[1 0 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.01, random_state = 0)

In [12]:
print(X_train.shape)
print(X_train)

(377297, 14)
[[0.0 1.0 0.0 ... 31265.0 152.0 185]
 [0.0 1.0 1.0 ... 48490.0 139.0 160]
 [0.0 1.0 0.0 ... 34204.0 124.0 203]
 ...
 [0.0 1.0 0.0 ... 30400.0 160.0 118]
 [1.0 0.0 0.0 ... 34849.0 157.0 229]
 [1.0 0.0 0.0 ... 30680.0 152.0 81]]


In [13]:
print(X_test.shape)
print(X_test)

(3812, 14)
[[1.0 0.0 0.0 ... 38744.0 152.0 267]
 [1.0 0.0 1.0 ... 24929.0 124.0 156]
 [1.0 0.0 1.0 ... 113486.0 26.0 70]
 ...
 [0.0 1.0 1.0 ... 20110.0 124.0 127]
 [1.0 0.0 1.0 ... 2630.0 157.0 153]
 [1.0 0.0 1.0 ... 34383.0 26.0 120]]


In [14]:
print(Y_train.shape)
print(Y_train)

(377297,)
[0 0 1 ... 0 0 0]


In [15]:
print(Y_test.shape)
print(Y_test)

(3812,)
[0 0 0 ... 0 1 0]


## Feature Scaling

In [16]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [17]:
print(X_train)

[[-0.92137569  0.92137569 -1.05270752 ...  0.04057348  0.73739702
   0.36619876]
 [-0.92137569  0.92137569  0.94993146 ...  1.04147281  0.49757372
   0.06740057]
 [-0.92137569  0.92137569 -1.05270752 ...  0.21135102  0.22085452
   0.58133346]
 ...
 [-0.92137569  0.92137569 -1.05270752 ... -0.00968938  0.88498059
  -0.43458039]
 [ 1.0853336  -1.0853336  -1.05270752 ...  0.24883027  0.82963675
   0.89208358]
 [ 1.0853336  -1.0853336  -1.05270752 ...  0.00658068  0.73739702
  -0.87680172]]


In [18]:
print(X_test)

[[ 1.0853336  -1.0853336  -1.05270752 ...  0.47515845  0.73739702
   1.34625683]
 [ 1.0853336  -1.0853336   0.94993146 ... -0.32759477  0.22085452
   0.01959286]
 [ 1.0853336  -1.0853336   0.94993146 ...  4.81821895 -1.5870442
  -1.00827292]
 ...
 [-0.92137569  0.92137569  0.94993146 ... -0.60761415  0.22085452
  -0.32701304]
 [ 1.0853336  -1.0853336   0.94993146 ... -1.62333085  0.82963675
  -0.01626292]
 [ 1.0853336  -1.0853336   0.94993146 ...  0.22175224 -1.5870442
  -0.41067654]]


# $$Artificial Neural Network$$

### Defining the model

Simply define an object of the Sequential model.

In [19]:
model = tf.keras.models.Sequential()

### Adding a first fully-connected hidden layer

Layer hyper-parameters:
- number of units/neurons: 256
- activation function: ReLU
- input_shape: (14, )

In [20]:
model.add(tf.keras.layers.Dense(units=256, activation='relu', input_shape=(14, )))

### Adding a second layer with Dropout

Dropout is a Regularization technique where we randomly set neurons in a layer to zero. That way while training those neurons won't be updated. Because some percentage of neurons won't be updated the whole training process is long and we have less chance for overfitting.

In [21]:
model.add(tf.keras.layers.Dropout(0.2))

## Adding the output layer

- units: number of classes (2 i.e. Yes/no)
- activation: softmax

In [22]:
model.add(tf.keras.layers.Dense(units=2, activation='softmax'))

### Compiling the model

- Optimizer: Adam
- Loss: Sparse softmax (categorical) crossentropy 

In [23]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               3840      
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 514       
Total params: 4,354
Trainable params: 4,354
Non-trainable params: 0
_________________________________________________________________


### Training the model

In [25]:
model.fit(X_train, Y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fece8480340>

### Model evaluation and prediction

In [26]:
test_loss, test_accuracy = model.evaluate(X_test, Y_test)



In [27]:
print("Test accuracy: {}".format(test_accuracy))

Test accuracy: 0.8790661096572876
