In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
churn=pd.read_csv('../input/churn-modelling/Churn_Modelling.csv',header=0)
churn.head()

Lets drop, RowNumber, Customer ID, Surname, as they wouldn't contribute to our analysis

In [None]:
churn=churn.drop(['RowNumber','CustomerId','Surname'],axis=1)
churn.head()

Exited column is our traget and hence, lets remove that and store it separately

In [None]:
target=churn['Exited']
churn=churn.drop('Exited',axis=1)
churn.head()

In [None]:
target.head()

In [None]:
churn.info()

There are no missing entries or null values. But, there are some fields that have improper data type (Gender, HasCrCard,IsActiveMemenr, should be boolean values intead of int)

In [None]:
churn=churn.astype({'HasCrCard':bool,'IsActiveMember':bool})
churn.info()

In [None]:
target=target.astype(bool)

In [None]:
churn.describe()

Lets focus on the categorical variable. We would have to convert them to numerical variable for our model to work with them. This can be using the get_dummies function

In [None]:
churn_updated=pd.get_dummies(churn,columns=['Geography','Gender'],prefix=['Geography','Gender'])
churn_updated.head()

In [None]:
churn_updated.info()

As we see, these new columns are of the type int. This has to be updated to bool, otherwise our model would be treating them as number and this would impact the performance of our model

In [None]:
churn_updated=churn_updated.astype({'Geography_France':bool,'Geography_Germany':bool,'Geography_Spain':bool,'Gender_Female':bool,'Gender_Male':bool})
churn_updated.info()

Now, things seems good. We can now split the data into train, dev and test set to get started with our model design

In [None]:
X_train=churn_updated.iloc[0:8000,:]
X_dev=churn_updated.iloc[8000:9000,:]
X_test=churn_updated.iloc[9000:10000,:]

print(X_train.shape,'\n',X_dev.shape,'\n',X_test.shape)

In [None]:
Y_train=target[0:8000]
Y_dev=target[8000:9000]
Y_test=target[9000:10000]

In [None]:
print(Y_train.shape,'\n',Y_dev.shape,'\n',Y_test.shape)

Lets use Sklearn and its functions to scale all the features to the same range. This would help gradient descent converge faster

In [None]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)
X_dev=sc.fit_transform(X_dev)

In [None]:
print(X_test.shape,'\n')
print(Y_test.shape,'\n')

The following steps converts all the data into numpy series (this is just a precautionary measure)

In [None]:
X_train = np.asarray(X_train)
Y_train = np.asarray(Y_train)
X_test = np.asarray(X_test)
Y_test = np.asarray(Y_test)
X_dev=np.asarray(X_dev)
Y_dev=np.asarray(Y_dev)

Now that we have data in the right form, lets import the required packages and get started with building models!!!

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow import constant, float32
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# Lets create the NN model

classifier=Sequential()

classifier.add(Dense(64,activation='relu',input_dim=13))
classifier.add(Dense(32,activation='relu'))
classifier.add(Dense(1,activation='sigmoid'))

classifier.compile(optimizer='adam',loss='mse',metrics=['accuracy'])


In [None]:
classifier.summary()

In [None]:
history=classifier.fit(X_train, Y_train,epochs=100)

In [None]:
print(X_test.shape,'\n')
print(Y_test.shape,'\n')

In [None]:
classifier.evaluate(X_dev,Y_dev)

We note that the <b>accuracy of the model 1 on the training set is 90% and in the test set is 85%.</b> We can be sure that the model is not impacted by over fitting. However, the model can do much better on the training set. 

Lets create different models to wotr with the available data, so that we can select the best model


<h3>Model 2</h3>

In [None]:
# This model is going to be 1 layer deeper than the previous one

classifier2=keras.Sequential()

classifier2.add(keras.layers.Dense(64,activation='relu',input_shape=(13,)))
classifier2.add(keras.layers.Dense(64,activation='relu'))
classifier2.add(keras.layers.Dense(32,activation='relu'))
classifier2.add(keras.layers.Dense(1,activation='sigmoid'))

In [None]:
# Lets compile this model

classifier2.compile(optimizer='adam',loss='mse',metrics=['accuracy'])

In [None]:
history2=classifier2.fit(X_train,Y_train,epochs=100)

In [None]:
classifier.evaluate(X_dev,Y_dev)

<b>Model 2 has a training set accuracy of 96% and dev set accuracy of 83.9%</b>

<h3>Model 3</h3>

In [None]:
classifier3=keras.Sequential()

classifier3.add(keras.layers.Dense(64,activation='sigmoid',input_shape=(13,)))
classifier3.add(keras.layers.Dense(32,activation='sigmoid'))
classifier3.add(keras.layers.Dense(1,activation='sigmoid'))

classifier3.compile(optimizer='adam',loss='mse',metrics=['accuracy'])

In [None]:
classifier3.fit(X_train,Y_train,epochs=100)


In [None]:
classifier3.evaluate(X_dev,Y_dev)

<b>Using a model that uses Sigmoid function impacts the performance of the model on the training set too!

Training set accuracy: 87%
Dev set accuracy: 84%
</b>    

<b>Lets go with model one, as its dev set accuracy seems to be the highest</b>

In [None]:
prediction=classifier.predict(X_test)
print(type(prediction))

In [None]:
for i in range(len(prediction)):
    if prediction[i]<=0.5:
        prediction[i]=False
    else:
        prediction[i]=True

prediction[0:5]    