### Importing Required Libraries

In [1]:
import tensorflow as tf
import keras

Using TensorFlow backend.


In [2]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense

In [3]:
import numpy as np
import pandas as pd

### Reading the file using Pandas

In [4]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
# Checking Null Values
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [6]:
# Finding dtype of columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [7]:
# Separating Target Variable and dropping not required columns.
X = df.drop(labels=['CustomerId', 'Surname', 'RowNumber', 'Exited'], axis = 1)
y = df['Exited']

In [8]:
print(X.shape, y.shape)

(10000, 10) (10000,)


### Label Encoding

#### I'm taking this is an optional

In [9]:
#from sklearn.preprocessing import LabelEncoder
#label = LabelEncoder()

In [10]:
#X['Geography'] = label.fit_transform(X['Geography'])

In [11]:
#X['Gender'] = label.fit_transform(X['Gender'])
#X.head()

### Getting Dummies

In [12]:
X = pd.get_dummies(X, drop_first=True, columns=['Geography', 'Gender'])
X.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,1,0


### Train Test Split

In [13]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Feature Scaling

In [14]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [15]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [16]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(8000, 11) (8000,)
(2000, 11) (2000,)


### Building ANN

In [17]:
model = Sequential()
model.add(Flatten())
model.add(Dense(X.shape[1], activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation = 'sigmoid'))

#### 1) Optimizer is Adam

In [18]:
model.compile(optimizer='adam', loss = 'binary_crossentropy', metrics=['accuracy'])

In [19]:
model.fit(X_train, y_train.to_numpy(),epochs = 10)

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1558a6a6888>

In [20]:
prediction = model.predict_classes(X_test)

In [21]:
model.evaluate(X_test, y_test.to_numpy())



[0.34102217948436736, 0.856]

In [22]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [23]:
print("Accuracy Score: ", accuracy_score(y_test, prediction))
print("Precision Score: ", precision_score(y_test, prediction))
print("Recall Score: ", recall_score(y_test, prediction))
print("F1 Score: ", f1_score(y_test, prediction))
print("Confusion Matrix:\n", confusion_matrix(y_test, prediction))

Accuracy Score:  0.856
Precision Score:  0.6778115501519757
Recall Score:  0.5506172839506173
F1 Score:  0.6076294277929156
Confusion Matrix:
 [[1489  106]
 [ 182  223]]


#### 2) Optimizer is Adagrad

In [24]:
model.compile(optimizer='adagrad', loss = 'binary_crossentropy', metrics=['accuracy'])

In [25]:
model.fit(X_train, y_train.to_numpy(),epochs = 10)

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1558a88b388>

In [26]:
prediction = model.predict_classes(X_test)

In [27]:
model.evaluate(X_test, y_test.to_numpy())



[0.33432381176948545, 0.858]

In [28]:
print("Accuracy Score: ", accuracy_score(y_test, prediction))
print("Precision Score: ", precision_score(y_test, prediction))
print("Recall Score: ", recall_score(y_test, prediction))
print("F1 Score: ", f1_score(y_test, prediction))
print("Confusion Matrix:\n", confusion_matrix(y_test, prediction))

Accuracy Score:  0.858
Precision Score:  0.7168458781362007
Recall Score:  0.49382716049382713
F1 Score:  0.5847953216374268
Confusion Matrix:
 [[1516   79]
 [ 205  200]]


#### 3) Optimizer is SGD(Stochastic Gradient Descent)

In [29]:
model.compile(optimizer='SGD', loss = 'binary_crossentropy', metrics=['accuracy'])

In [30]:
model.fit(X_train, y_train.to_numpy(),epochs = 10)

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x15585b99408>

In [31]:
prediction = model.predict_classes(X_test)

In [32]:
model.evaluate(X_test, y_test.to_numpy())



[0.333136519908905, 0.8615]

In [33]:
print("Accuracy Score: ", accuracy_score(y_test, prediction))
print("Precision Score: ", precision_score(y_test, prediction))
print("Recall Score: ", recall_score(y_test, prediction))
print("F1 Score: ", f1_score(y_test, prediction))
print("Confusion Matrix:\n", confusion_matrix(y_test, prediction))

Accuracy Score:  0.8615
Precision Score:  0.7269503546099291
Recall Score:  0.5061728395061729
F1 Score:  0.596797671033479
Confusion Matrix:
 [[1518   77]
 [ 200  205]]


#### 4) Optimizer is RMSprop

In [34]:
model.compile(optimizer='RMSprop', loss = 'binary_crossentropy', metrics=['accuracy'])

In [35]:
model.fit(X_train, y_train.to_numpy(),epochs = 10)

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1558b4cf648>

In [36]:
prediction = model.predict_classes(X_test)

In [37]:
model.evaluate(X_test, y_test.to_numpy())



[0.33438518905639647, 0.86]

In [38]:
print("Accuracy Score: ", accuracy_score(y_test, prediction))
print("Precision Score: ", precision_score(y_test, prediction))
print("Recall Score: ", recall_score(y_test, prediction))
print("F1 Score: ", f1_score(y_test, prediction))
print("Confusion Matrix:\n", confusion_matrix(y_test, prediction))

Accuracy Score:  0.86
Precision Score:  0.7323420074349443
Recall Score:  0.48641975308641977
F1 Score:  0.5845697329376854
Confusion Matrix:
 [[1523   72]
 [ 208  197]]


### By Comparing both accuaracy score and F1 score we can clearly identify that the model is over fitting
### To eliminate the over fitting we use dropout in the hidden layers

Dropout is a regularization technique patented by Google for reducing overfitting in neural networks by preventing complex co-adaptations on training data. It is a very efficient way of performing model averaging with neural networks. The term "dropout" refers to dropping out units (both hidden and visible) in a neural network

In [39]:
# Importing the Dropout()
from tensorflow.keras.layers import Dropout

### Activation Relu

In [40]:
D_model = Sequential()
D_model.add(Dropout(0.2))
D_model.add(Flatten())
D_model.add(Dropout(0.5))
D_model.add(Dense(X.shape[1], activation='relu'))
D_model.add(Dropout(0.5))
D_model.add(Dense(128, activation='relu'))
D_model.add(Dropout(0.5))
D_model.add(Dense(1, activation = 'sigmoid'))

In [41]:
D_model.compile(optimizer='SGD', loss = 'binary_crossentropy', metrics=['accuracy'])

In [42]:
D_model.fit(X_train, y_train.to_numpy(),epochs = 10)

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1558d027088>

In [43]:
prediction = D_model.predict_classes(X_test)

In [44]:
D_model.evaluate(X_test, y_test.to_numpy())



[0.48650478529930113, 0.7975]

In [45]:
print("Accuracy Score: ", accuracy_score(y_test, prediction))
print("Precision Score: ", precision_score(y_test, prediction))
print("Recall Score: ", recall_score(y_test, prediction))
print("F1 Score: ", f1_score(y_test, prediction))
print("Confusion Matrix:\n", confusion_matrix(y_test, prediction))

Accuracy Score:  0.7975
Precision Score:  0.0
Recall Score:  0.0
F1 Score:  0.0
Confusion Matrix:
 [[1595    0]
 [ 405    0]]


  _warn_prf(average, modifier, msg_start, len(result))


### Activation is Softmax

In [46]:
D_model = Sequential()
D_model.add(Flatten())
D_model.add(Dense(X.shape[1], activation='softmax'))
D_model.add(Dropout(0.5))
D_model.add(Dense(128, activation='softmax'))
D_model.add(Dropout(0.5))
D_model.add(Dense(1, activation = 'sigmoid'))

In [47]:
D_model.compile(optimizer='SGD', loss = 'binary_crossentropy', metrics=['accuracy'])

In [48]:
D_model.fit(X_train, y_train.to_numpy(),epochs = 10)

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1558ea5b2c8>

In [49]:
prediction = D_model.predict_classes(X_test)

In [50]:
D_model.evaluate(X_test, y_test.to_numpy())



[0.503883594751358, 0.7975]

In [51]:
print("Accuracy Score: ", accuracy_score(y_test, prediction))
print("Precision Score: ", precision_score(y_test, prediction))
print("Recall Score: ", recall_score(y_test, prediction))
print("F1 Score: ", f1_score(y_test, prediction))
print("Confusion Matrix:\n", confusion_matrix(y_test, prediction))

Accuracy Score:  0.7975
Precision Score:  0.0
Recall Score:  0.0
F1 Score:  0.0
Confusion Matrix:
 [[1595    0]
 [ 405    0]]


  _warn_prf(average, modifier, msg_start, len(result))
