In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.0.0'

In [3]:
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

In [9]:
y

array([1, 0, 1, ..., 1, 1, 0], dtype=int64)

In [10]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [11]:
X[1]

array([608, 'Spain', 'Female', 41, 1, 83807.86, 1, 0, 1, 112542.58],
      dtype=object)

Encoding the variables

In [12]:
#label encoding the Gender Column

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,2] = le.fit_transform(X[:,2])
X

array([[619, 'France', 0, ..., 1, 1, 101348.88],
       [608, 'Spain', 0, ..., 0, 1, 112542.58],
       [502, 'France', 0, ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 0, ..., 0, 1, 42085.58],
       [772, 'Germany', 1, ..., 1, 0, 92888.52],
       [792, 'France', 0, ..., 1, 0, 38190.78]], dtype=object)

In [16]:
X[1]

array([608, 'Spain', 0, 41, 1, 83807.86, 1, 0, 1, 112542.58], dtype=object)

In [14]:
#Label Encodings should only be used when the categorical
#variable in question has an ordinal relationship such as
#an age group, ages 0-99 represented as ages 0-9 as 0, 10-19 as 1, … etc,
#and One Hot Encodings be used in all the other instances in
#which such a relationship is NOT present.

In [17]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [ 1])], remainder='passthrough') #index of the column
X = np.array(ct.fit_transform(X))

In [19]:
X[1]

array([0.0, 0.0, 1.0, 608, 0, 41, 1, 83807.86, 1, 0, 1, 112542.58],
      dtype=object)

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

Feature Scaling , it is req for DL

In [21]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train) #only fitted on train set to avoid data leakage
X_test = sc.transform(X_test)

In [22]:
X_train[1]

array([-1.01460667,  1.75486502, -0.57369368, -2.30455945,  0.91601335,
        0.30102557, -1.37744033, -0.00631193, -0.92159124,  0.64259497,
        0.9687384 , -0.74866447])

Building ANN

In [23]:
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

Compiling

In [24]:
ann.compile(optimizer='adam' , loss='binary_crossentropy' , metrics=['accuracy']) 
#adam-stocastic gradient descent
#binary classification thenn binarycrossentropy
#more than 2 categorical cross entropy
#if output is not binary then softmax

In [27]:
ann.fit(X_train , y_train , batch_size = 32 , epochs=10)

Train on 8000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x144c3e3e288>

Prediction

In [28]:
y_pred = ann.predict(X_test)

In [29]:

y_pred


array([[0.25847542],
       [0.32400507],
       [0.12266552],
       ...,
       [0.22186196],
       [0.16148964],
       [0.22995242]], dtype=float32)

In [31]:
y_pred = (y_pred > 0.5)
y_pred


array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [34]:
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[0 0]
 [0 1]
 [0 0]
 ...
 [0 0]
 [0 0]
 [0 0]]


In [32]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[1510   85]
 [ 189  216]]


0.863

In [33]:
#the predict input must be a 2d array
print(ann.predict(sc.transform([[1, 0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])) > 0.5) #for getting boolean value

[[False]]
