<a href="https://colab.research.google.com/github/obeabi/DeepLearning/blob/master/BankChurn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Written by Abiola Obembe
## Title : Bank Churn
### Date : 2020-09-04
### Objective: Neural network model for predicting customer churn

In [9]:
# install libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import seaborn as sns
%matplotlib inline

print("libraies installed successfully!")

libraies installed successfully!


In [10]:
# print tensorflow version
print(" The tensorflow version is"   ,    str(tf.__version__))

 The tensorflow version is 2.3.0


## Step 1: Data Pre-Processing

### Import dataset

In [12]:
# import dataset
dataset = pd.read_csv("Churn_Modelling.csv")
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### Data Cleaning

In [13]:
# Split dataset into dependent and independent varaibles
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values
print(X.shape)
print(y.shape)

(10000, 10)
(10000,)


In [20]:
# check for missing values
missing_values = dataset.isnull().sum()
print(missing_values)
print("The total number of missing values is", missing_values.sum())

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64
The total number of missing values is 0


In [14]:
# print X
print(X)


[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]


In [15]:
# print y
print(y)

[1 0 1 ... 1 1 0]


### Encoding categorical variables

In [21]:
# Encoding the Independent Variable
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1,2])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


In [26]:
# Print shape of X
print(X.shape)
print(X[:, :6])

(10000, 13)
[[1.0 0.0 0.0 1.0 0.0 619]
 [0.0 0.0 1.0 1.0 0.0 608]
 [1.0 0.0 0.0 1.0 0.0 502]
 ...
 [1.0 0.0 0.0 1.0 0.0 709]
 [0.0 1.0 0.0 0.0 1.0 772]
 [1.0 0.0 0.0 1.0 0.0 792]]


### Split dataset into Training and Testing set

In [27]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(8000, 13)
(2000, 13)
(8000,)
(2000,)


### Feature Scaling

In [28]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train)

[[-0.99850112  1.71490137 -0.57273139 ... -1.55337352  0.97725852
   0.42739449]
 [ 1.00150113 -0.58312392 -0.57273139 ... -1.55337352 -1.02327069
  -1.02548708]
 [-0.99850112  1.71490137 -0.57273139 ...  0.64376017  0.97725852
  -0.94479772]
 ...
 [ 1.00150113 -0.58312392 -0.57273139 ...  0.64376017  0.97725852
  -0.14096853]
 [ 1.00150113 -0.58312392 -0.57273139 ...  0.64376017  0.97725852
   0.01781218]
 [-0.99850112  1.71490137 -0.57273139 ...  0.64376017 -1.02327069
  -1.15822478]]


## Step 2: Build the ANN

### Initialze ANN

In [None]:
# Initialize ANN network
ann = tf.keras.models.Sequential()

### Add input and first hidden layer

In [None]:
# Establish the first hidden layer of the NN
n_H1 = 6
ann.add(tf.keras.layers.Dense( units= n_H1, activation= 'relu'))

### Add second hidden layer

In [None]:
# Establish the second hidden layer of the NN
n_H2 = 3
ann.add(tf.keras.layers.Dense( units= n_H2, activation= 'relu'))

### Add the output layer

In [None]:
# Establish output layer
ann.add(tf.keras.layers.Dense( units= 1, activation= 'sigmoid'))

## Train the ANN