In [None]:
from IPython.display import Image
import os

Image("../input/crosssell/edd-cross-sell-and-upsell.png")

* This Notebook is solved using Deep learning technique. 
* Imbalanced data has been corrected using Under Sampling.
# Please upvote if like this notebook for further encouragement.  

In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

# Supress unnecessary warnings so that presentation looks clean
import warnings
warnings.filterwarnings("ignore")
from imblearn.combine import SMOTETomek
from imblearn.under_sampling import NearMiss

In [None]:
df = pd.read_csv("../input/health-insurance-cross-sell-prediction/train.csv")
df.head()

This EDA will check the Y variable is imbalanced as class is not a 50/50 or 60/40 distribution.

In [None]:
count_classes = pd.value_counts(df['Response'], sort = True)

count_classes.plot(kind = 'bar', rot=0)

plt.title("Response Class Distribution")

plt.xticks(range(2))

plt.xlabel("Response")

plt.ylabel("Frequency")

In [None]:
df['Response'].value_counts()

In [None]:
#Checking Null Values
df.isnull().sum()

One hot encoding 

In [None]:
df['Gender'] = pd.Categorical(df['Gender'])
df['Previously_Insured'] = pd.Categorical(df['Previously_Insured'])
df['Vehicle_Age'] = pd.Categorical(df['Vehicle_Age'])
df['Vehicle_Damage'] = pd.Categorical(df['Vehicle_Damage'])
df['Response'] = pd.Categorical(df['Response'])
df['Region_Code'] = pd.Categorical(df['Region_Code'])
df['Driving_License'] = pd.Categorical(df['Driving_License'])

df = pd.concat([df[['Age', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage', 'Response']],
           pd.get_dummies(df[['Gender', 'Previously_Insured', 'Vehicle_Age', 'Vehicle_Damage','Driving_License','Response']])], axis=1)

# Correlation plot

In [None]:
#Using Pearson Correlation
plt.figure(figsize=(12,10))
cor = df.corr()
sns.heatmap(cor, annot=True)
plt.show()

In [None]:
X = df.loc[:, ['Age','Annual_Premium','Policy_Sales_Channel','Vintage','Gender_Female','Gender_Male','Previously_Insured_0','Previously_Insured_1','Vehicle_Age_1-2 Year','Vehicle_Age_< 1 Year','Vehicle_Age_> 2 Years','Vehicle_Damage_No','Vehicle_Damage_Yes','Driving_License_0','Driving_License_1']] #Frpm 3rd column to 13th column all are indenpendent features.
y = df.loc[:,["Response"]]

# Implementing Oversampling for Handling Imbalanced

In [None]:
smk = SMOTETomek(random_state=42)
X_res,y_res=smk.fit_sample(X,y)

Equally distributed ) 0 & 1

In [None]:
X_res.shape,y_res.shape

In [None]:
y_res.value_counts()

In [None]:
#kfold stratified
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=2)
skf.get_n_splits(X_res, y_res)

In [None]:
StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
for train_index, test_index in skf.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size = 0.1, random_state = 0)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# ANN - Deep learning Started

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout 

In [None]:
classifier = Sequential() #Forward and Backward Propogation

# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 6,kernel_initializer='he_uniform',activation='relu',input_dim = 15))#in hidden layer 6 nodes and given dimension

# Adding the second hidden layer
classifier.add(Dense(units = 6, kernel_initializer = 'he_uniform',activation='relu'))#Kernal intialization - Weight intializing technique. Problem may be- Exploding gradient problem1
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'glorot_uniform', activation = 'sigmoid'))

# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) #prit code restart kro phirse. mai dekh rha hu

In [None]:
classifier.summary()

In [None]:
model_history=classifier.fit(X_train, y_train,validation_split=0.33, batch_size = 10, epochs = 25)

#low bias, high variance.

In [None]:
# list all data in history

print(model_history.history.keys())
# summarize history for accuracy
plt.plot(model_history.history['accuracy'])
plt.plot(model_history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show() #galat he prit. graph dekho ha kya karu. ha upar se dekhta hu. 1min

In [None]:
# summarize history for loss
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
#Part 3 - Making the predictions and evaluating the model

# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [None]:
y_pred 

In [None]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm 

In [None]:
# Calculate the Accuracy
from sklearn.metrics import accuracy_score
score=accuracy_score(y_pred,y_test)
score

In [None]:
X_res

In [None]:
predictedcarinsurance = model_history.predict([[44, 40454, 26, 217, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1]])