In [2]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('bank-churn.csv')

checking data types of every column to feed into model

In [4]:
df.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

Removing customerId and surname as it is useless for the prediction

In [13]:
df1 = df.drop(columns=['CustomerId', 'Surname'])

In [14]:
df1.dtypes

RowNumber            int64
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [15]:
for column in df1:
    if df1[column].dtype == 'object':
        print(f'{column}: {df1[column].unique()}')

Geography: ['France' 'Spain' 'Germany']
Gender: ['Female' 'Male']


In [20]:
df1['Gender'].replace({'Female':1,'Male':0},inplace=True)

In [21]:
df2 = pd.get_dummies(data=df1, columns=['Geography'])
df2

Unnamed: 0,RowNumber,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,1,619,1,42,2,0.00,1,1,1,101348.88,1,1,0,0
1,2,608,1,41,1,83807.86,1,0,1,112542.58,0,0,0,1
2,3,502,1,42,8,159660.80,3,1,0,113931.57,1,1,0,0
3,4,699,1,39,1,0.00,2,0,0,93826.63,0,1,0,0
4,5,850,1,43,2,125510.82,1,1,1,79084.10,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,771,0,39,5,0.00,2,1,0,96270.64,0,1,0,0
9996,9997,516,0,35,10,57369.61,1,1,1,101699.77,0,1,0,0
9997,9998,709,1,36,7,0.00,1,0,1,42085.58,1,1,0,0
9998,9999,772,0,42,3,75075.31,2,1,0,92888.52,1,0,1,0


In [23]:
cols_to_scale = ['CreditScore','Age','Tenure', 'Balance', 'EstimatedSalary']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df2[cols_to_scale] = scaler.fit_transform(df2[cols_to_scale])
df2

Unnamed: 0,RowNumber,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,1,0.538,1,0.324324,0.2,0.000000,1,1,1,0.506735,1,1,0,0
1,2,0.516,1,0.310811,0.1,0.334031,1,0,1,0.562709,0,0,0,1
2,3,0.304,1,0.324324,0.8,0.636357,3,1,0,0.569654,1,1,0,0
3,4,0.698,1,0.283784,0.1,0.000000,2,0,0,0.469120,0,1,0,0
4,5,1.000,1,0.337838,0.2,0.500246,1,1,1,0.395400,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,0.842,0,0.283784,0.5,0.000000,2,1,0,0.481341,0,1,0,0
9996,9997,0.332,0,0.229730,1.0,0.228657,1,1,1,0.508490,0,1,0,0
9997,9998,0.718,1,0.243243,0.7,0.000000,1,0,1,0.210390,1,1,0,0
9998,9999,0.844,0,0.324324,0.3,0.299226,2,1,0,0.464429,1,0,1,0


In [24]:
df2.dtypes

RowNumber              int64
CreditScore          float64
Gender                 int64
Age                  float64
Tenure               float64
Balance              float64
NumOfProducts          int64
HasCrCard              int64
IsActiveMember         int64
EstimatedSalary      float64
Exited                 int64
Geography_France       uint8
Geography_Germany      uint8
Geography_Spain        uint8
dtype: object

In [26]:
for col in df2:
    if col == 'CreditScore':
        continue
    print(f'{col}: {df2[col].unique()}')

RowNumber: [    1     2     3 ...  9998  9999 10000]
Gender: [1 0]
Age: [0.32432432 0.31081081 0.28378378 0.33783784 0.35135135 0.43243243
 0.14864865 0.12162162 0.17567568 0.08108108 0.21621622 0.09459459
 0.22972973 0.36486486 0.54054054 0.18918919 0.27027027 0.37837838
 0.24324324 0.2027027  0.2972973  0.44594595 0.58108108 0.41891892
 0.25675676 0.01351351 0.64864865 0.51351351 0.10810811 0.04054054
 0.5        0.77027027 0.05405405 0.16216216 0.13513514 0.63513514
 0.40540541 0.45945946 0.52702703 0.74324324 0.39189189 0.48648649
 0.72972973 0.02702703 0.66216216 0.82432432 0.59459459 0.47297297
 0.83783784 0.55405405 0.67567568 0.06756757 0.56756757 0.7027027
 0.60810811 0.62162162 0.         0.86486486 0.68918919 0.75675676
 0.71621622 0.78378378 0.7972973  0.94594595 0.90540541 0.89189189
 0.81081081 0.85135135 1.         0.87837838]
Tenure: [0.2 0.1 0.8 0.7 0.4 0.6 0.3 1.  0.5 0.9 0. ]
Balance: [0.         0.33403148 0.63635718 ... 0.22865702 0.29922631 0.51870777]
NumOfProduc

Splitting data for training

In [29]:
X = df2.drop('Exited', axis=1)
y = df2['Exited']

0       1
1       0
2       1
3       0
4       0
       ..
9995    0
9996    0
9997    1
9998    1
9999    0
Name: Exited, Length: 10000, dtype: int64

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)

In [40]:
X_train.shape

(8000, 13)

Making ANN using Keras

In [37]:
len(X_train.columns)

13

In [48]:
model = keras.Sequential([
    keras.layers.Dense(13, input_shape=(13,), activation='relu'),
    keras.layers.Dense(9, activation="relu"),
    keras.layers.Dense(2, activation='sigmoid')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=["accuracy"])
model.fit(X_train, y_train, epochs=40)
model.evaluate(X_test, y_test)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


[0.6229037046432495, 0.7975000143051147]