# $$Health-Insurance-Cross-Sell-Prediction$$


## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('train.csv')
X = dataset.iloc[:, 1:-1].values
Y = dataset.iloc[:, -1].values

print(X.shape)
print(Y.shape)

(381109, 10)
(381109,)


In [3]:
# Visualising dataframe
dataset.head()

Unnamed: 0,id,Gender,Age,Driving_License,Region_Code,Previously_Insured,Vehicle_Age,Vehicle_Damage,Annual_Premium,Policy_Sales_Channel,Vintage,Response
0,1,Male,44,1,28.0,0,> 2 Years,Yes,40454.0,26.0,217,1
1,2,Male,76,1,3.0,0,1-2 Year,No,33536.0,26.0,183,0
2,3,Male,47,1,28.0,0,> 2 Years,Yes,38294.0,26.0,27,1
3,4,Male,21,1,11.0,1,< 1 Year,No,28619.0,152.0,203,0
4,5,Female,29,1,41.0,1,< 1 Year,No,27496.0,152.0,39,0


In [4]:
# Checking Missing values
dataset.isna().sum()

id                      0
Gender                  0
Age                     0
Driving_License         0
Region_Code             0
Previously_Insured      0
Vehicle_Age             0
Vehicle_Damage          0
Annual_Premium          0
Policy_Sales_Channel    0
Vintage                 0
Response                0
dtype: int64

## Encoding categorical data

In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct= ColumnTransformer([('encoder', OneHotEncoder(),[0]),
                                 ('encoder1', OneHotEncoder(),[5]),
                                 ('encoder2', OneHotEncoder(),[6])], remainder ='passthrough')
X=np.array(ct.fit_transform(X))


## Encoding the Dependent Variable

In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
Y= le.fit_transform(Y)
print(Y)

[1 0 1 ... 0 0 0]


## Splitting the dataset into the Training set and Test set

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.10, random_state = 0)

In [8]:
print(X_train.shape)
print(X_train)

(342998, 14)
[[1.0 0.0 0.0 ... 44317.0 26.0 25]
 [1.0 0.0 1.0 ... 2630.0 124.0 64]
 [1.0 0.0 0.0 ... 38964.0 152.0 247]
 ...
 [0.0 1.0 0.0 ... 30400.0 160.0 118]
 [1.0 0.0 0.0 ... 34849.0 157.0 229]
 [1.0 0.0 0.0 ... 30680.0 152.0 81]]


In [9]:
print(X_test.shape)
print(X_test)

(38111, 14)
[[1.0 0.0 0.0 ... 38744.0 152.0 267]
 [1.0 0.0 1.0 ... 24929.0 124.0 156]
 [1.0 0.0 1.0 ... 113486.0 26.0 70]
 ...
 [0.0 1.0 0.0 ... 31147.0 152.0 178]
 [1.0 0.0 1.0 ... 31947.0 8.0 166]
 [0.0 1.0 1.0 ... 32448.0 11.0 25]]


In [10]:
print(Y_train.shape)
print(Y_train)

(342998,)
[1 0 0 ... 0 0 0]


In [11]:
print(Y_test.shape)
print(Y_test)

(38111,)
[0 0 0 ... 0 0 0]


## Feature Scaling

In [12]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [13]:
print(X_train)

[[ 1.08522337 -1.08522337 -1.05205302 ...  0.79802947 -1.58729499
  -1.54405244]
 [ 1.08522337 -1.08522337  0.95052244 ... -1.62201915  0.22071153
  -1.07810624]
 [ 1.08522337 -1.08522337 -1.05205302 ...  0.48727263  0.73728482
   1.10825669]
 ...
 [-0.92146928  0.92146928 -1.05205302 ... -0.00989186  0.88487719
  -0.43294996]
 [ 1.08522337 -1.08522337 -1.05205302 ...  0.24838521  0.82953005
   0.8932046 ]
 [ 1.08522337 -1.08522337 -1.05205302 ...  0.00636293  0.73728482
  -0.87500149]]


In [14]:
print(X_test)

[[ 1.08522337 -1.08522337 -1.05205302 ...  0.47450101  0.73728482
   1.34720346]
 [ 1.08522337 -1.08522337  0.95052244 ... -0.32749894  0.22071153
   0.02104889]
 [ 1.08522337 -1.08522337  0.95052244 ...  4.81348617 -1.58729499
  -1.00642221]
 ...
 [-0.92146928  0.92146928 -1.05205302 ...  0.03347361  0.73728482
   0.28389034]
 [ 1.08522337 -1.08522337  0.95052244 ...  0.07991588 -1.91937783
   0.14052228]
 [-0.92146928  0.92146928  0.95052244 ...  0.10900035 -1.86403069
  -1.54405244]]


# $$Naive-Bayes$$

In [31]:
# Training the Naive Bayes model on the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, Y_train)

GaussianNB()

## Predicting a new result

In [32]:
print(classifier.predict(sc.transform([X_test[1]])))

[0]


## Predicting the Test set results

In [33]:
Y_pred = classifier.predict(X_test)
print(np.concatenate((Y_pred.reshape(len(Y_pred),1), Y_test.reshape(len(Y_test),1)),1))

[[0 0]
 [1 0]
 [1 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


## Making the Confusion Matrix

In [34]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(Y_test, Y_pred)
print(cm)
accuracy_score(Y_test, Y_pred)

[[20284 13110]
 [  131  4586]]


0.6525675001967935