In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
!pip install -U seaborn
import seaborn as sns
%matplotlib inline
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Exploring dataset

In [None]:
df=pd.read_csv('../input/health-insurance-cross-sell-prediction/train.csv')

In [None]:
df_test=pd.read_csv('../input/health-insurance-cross-sell-prediction/test.csv')

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.describe()

# Preparing data for fitting the model

### Creating Bins

In [None]:
age=pd.cut(df['Age'],bins=5,labels=['A1','A2','A3','A4','A5'])
rc=pd.cut(df['Region_Code'],bins=5,labels=['A1','A2','A3','A4','A5'])
ap=pd.cut(df['Annual_Premium'],bins=5,labels=['A1','A2','A3','A4','A5'])
vin=pd.cut(df['Vintage'],bins=5,labels=['A1','A2','A3','A4','A5'])

In [None]:
age2=pd.cut(df_test['Age'],bins=5,labels=['A1','A2','A3','A4','A5'])
rc2=pd.cut(df_test['Region_Code'],bins=5,labels=['A1','A2','A3','A4','A5'])
ap2=pd.cut(df_test['Annual_Premium'],bins=5,labels=['A1','A2','A3','A4','A5'])
vin2=pd.cut(df_test['Vintage'],bins=5,labels=['A1','A2','A3','A4','A5'])

## Adding columns to our datasets

In [None]:
df['age']=age             
df['rc']=rc 
df['ap']=ap
df['vin']=vin

In [None]:
df_test['age2']=age2       
df_test['rc2']=rc2
df_test['ap2']=ap2
df_test['vin2']=vin2

### Creating dummy columns

In [None]:
df_dum=pd.get_dummies(df[['age','rc','ap','vin','Gender','Vehicle_Age','Vehicle_Damage','Vehicle_Damage']])
df_dum2=pd.get_dummies(df_test[['age2','rc2','ap2','vin2','Gender','Vehicle_Age','Vehicle_Damage','Vehicle_Damage']])

In [None]:
df=pd.concat([df,df_dum],axis=1)
df_test=pd.concat([df_test,df_dum2],axis=1)

In [None]:
df.drop(['Gender','Age','age','Region_Code','rc','Vehicle_Age','Vehicle_Damage','Annual_Premium','ap','Vintage','vin'],axis=1,inplace=True)
df_test.drop(['Gender','Age','age2','Region_Code','rc2','Vehicle_Age','Vehicle_Damage','Annual_Premium','ap2','Vintage','vin2'],axis=1,inplace=True)

# Scaling the data

In [None]:
from sklearn.preprocessing import StandardScaler as sc

In [None]:
scaler=sc()
scaled_df=scaler.fit_transform(df.drop(['id','Response'],axis=1))
scaled_dft=scaler.fit_transform(df_test.drop('id',axis=1))

### Dividing the data

In [None]:
from sklearn.model_selection import train_test_split
xtr,xte,ytr,yte=train_test_split(scaled_df,df['Response'],random_state=42,test_size=0.27)

# Selecting the right model with tuned Hyper-Parameters

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import RandomizedSearchCV

In [None]:
sgc=SGDClassifier(penalty='l1',loss='modified_huber',early_stopping=True)
model_params={'alpha':[0.0012,0.0011,0.0013],
              'learning_rate':['invscaling'],
              'max_iter':[690,700,710],
              'validation_fraction':[0.46,0.47,0.48],
              'eta0':[0.65,0.66,0.64]}
ran=RandomizedSearchCV(sgc,param_distributions=model_params,cv=5,n_jobs=-1,verbose=2,n_iter=100)

In [None]:
ran.fit(scaled_df,df['Response'])

In [None]:
ran.best_params_

In [None]:
ran.best_score_

In [None]:
y_pred=ran.predict(scaled_dft)

In [None]:
submissions=pd.DataFrame({'id':df_test['id'],'Response':y_pred})

# Building our Neural-Net model

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks

In [None]:
model=keras.Sequential([layers.Dense(units=1024,activation='relu',input_shape=[32]),
                        layers.BatchNormalization(),
                        layers.Dense(units=1024,activation='relu'),
                        layers.Dropout(0.27),
                        layers.BatchNormalization(),
                        layers.Dense(units=1024,activation='relu'),
                        layers.Dropout(0.27),
                        layers.BatchNormalization(),
                        layers.Dense(units=1024,activation='relu'),
                        layers.Dropout(0.27),
                        layers.BatchNormalization(),
                        layers.Dense(units=1024,activation='relu'),
                        layers.Dropout(0.27),
                        layers.BatchNormalization(),
                        layers.Dense(units=1024,activation='relu'),
                        layers.Dropout(0.27),
                        layers.BatchNormalization(),
                        layers.Dense(units=1024,activation='relu'),
                        layers.Dropout(0.27),
                        layers.BatchNormalization(),
                        layers.Dense(units=1,activation='sigmoid')])

In [None]:
model.compile(optimizer='Adam',loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
early_stop=callbacks.EarlyStopping(min_delta=0.001, patience=20, restore_best_weights=True)
history = model.fit(
    xtr, ytr,
    validation_data=(xte, yte),
    batch_size=512,
    epochs=100,
    callbacks=[early_stop]
)

In [None]:
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot();
print("Minimum validation loss: {}".format(history_df['val_loss'].min()))

## 87.6% Accuracy