In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras


Using TensorFlow backend.


In [2]:
df = pd.read_csv("../Section 39 - Artificial Neural Networks (ANN)/Churn_Modelling.csv")
df.tail()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.0,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.0,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1
9999,10000,15628319,Walker,792,France,Female,28,4,130142.79,1,1,0,38190.78,0


In [3]:
_df = df[['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited']]
_df.shape

(10000, 11)

In [4]:
X = _df.iloc[:, :-1].values
y = _df.iloc[:, -1].values

X.shape, X[:3], y.shape, y[:3]

((10000, 10), array([[619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
        [608, 'Spain', 'Female', 41, 1, 83807.86, 1, 0, 1, 112542.58],
        [502, 'France', 'Female', 42, 8, 159660.8, 3, 1, 0, 113931.57]],
       dtype=object), (10000,), array([1, 0, 1], dtype=int64))

In [5]:
_df['Geography'].value_counts()

France     5014
Germany    2509
Spain      2477
Name: Geography, dtype: int64

In [6]:
label_encoder = LabelEncoder()
X[:, 1] = label_encoder.fit_transform(X[:,1])
X[:]

array([[619, 0, 'Female', ..., 1, 1, 101348.88],
       [608, 2, 'Female', ..., 0, 1, 112542.58],
       [502, 0, 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 0, 'Female', ..., 0, 1, 42085.58],
       [772, 1, 'Male', ..., 1, 0, 92888.52],
       [792, 0, 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [7]:
X[:, 2] = label_encoder.fit_transform(X[:, 2])
X[:]

array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ...,
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

In [8]:
hot_encoder = OneHotEncoder(categorical_features=[1])
X = hot_encoder.fit_transform(X).toarray()
X.shape, X[0]

((10000, 12),
 array([1.0000000e+00, 0.0000000e+00, 0.0000000e+00, 6.1900000e+02,
        0.0000000e+00, 4.2000000e+01, 2.0000000e+00, 0.0000000e+00,
        1.0000000e+00, 1.0000000e+00, 1.0000000e+00, 1.0134888e+05]))

In [9]:
#remove dummy variables
X = X[:, 1:]
X.shape

(10000, 11)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((8000, 11), (8000,), (2000, 11), (2000,))

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras import backend as K
# import tensorflow as tf
# sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# sess

In [13]:
classifier = Sequential()
#1st hidden layer
#output_dim => is the number of nodes - there is no rules how many nodes we need
#the rule of thumb : the average of number of nodes in the input layer and
#the number of nodes in the output layer (output_dim/ units)
#independent variables ( input layer ) = 11
#dependent variables ( output layer ) = 1 ( the values can only be 0 or 1 it only needs 1 node)
#the nodes could be 11 + 1 / 2 = 6
#input_dim is the input dependent variables
#kernel_initializer is used for the weight initializing
classifier.add(Dense(input_dim=11,
                     units=6, 
                     kernel_initializer='uniform', 
                     activation = 'relu'))

#2nd hidden layer => you may remove input_dim since we have defined the input layer
classifier.add(Dense(units=6,
                     kernel_initializer='uniform',
                     activation="relu"))

#3rd layer => output layer only 1 node because our dependent variable is binary outcome
classifier.add(Dense(units=1,
                     kernel_initializer='uniform',
                     activation="sigmoid"))
#use softmax if you want to define more than 2 categories
#if the dependent variable is more than 2 categories you have to use 3 nodes
#because combination of binary => 1 0 0 - 0 1 0 - 0 0 1

In [14]:
#you can use multiple metrics
#if the dependent variable is binary you should use loss function binary_crossentropy
#if more than 2 class you should use another loss function (categorical_crossentropy)
#optimizer = 'adam' is the stohastic gradient descent
classifier.compile(optimizer="adam", 
                   loss = "binary_crossentropy",
                   metrics = ["accuracy"])

#batch_size is determining how many data passed in the ANN in single trip/ 1 epoch
#epoch how many iteration we do the weight fitting
classifier.fit(X_train, y_train, 
               batch_size=10, 
               epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x197664a70b8>

In [15]:
#it shows the probability of the customer leave the bank,
#it does NOT return the predicted result but it returns the probability
y_pred = classifier.predict(X_test)
y_pred

array([[0.06881738],
       [0.07774834],
       [0.06408882],
       ...,
       [0.38106424],
       [0.06657935],
       [0.49437112]], dtype=float32)

In [16]:
'''for example you want to set the threshold to 0.5
the threshold setting depends on the need, e.g. 
identifying the tumour is malignant then the threshold value should be higher
in this prediction we only predict the probability of user actions then 0.5 would be fine
'''
#convert to True / False if the value is larger than 0.5
y_pred = (y_pred > 0.5)
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [17]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[1624,   18],
       [ 310,   48]], dtype=int64)

In [18]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

In [21]:
class testnjob() :
    def __init__(self, X_train, y_train) :
        classifier = KerasClassifier(build_fn = self.build_classifier, 
                             batch_size=10,
                             nb_epoch = 3)
        self.accuracies = cross_val_score(estimator = classifier, 
                             X = X_train, 
                             y = y_train,
                             cv = 10,
                             n_jobs=-1)
        
    def build_classifier(self):
        classifier = Sequential()
        classifier.add(Dense(input_dim=11,
                             units=6, 
                             kernel_initializer='uniform', 
                             activation = 'relu'))
        classifier.add(Dense(units=6,
                             kernel_initializer='uniform',
                             activation="relu"))
        classifier.add(Dense(units=1,
                             kernel_initializer='uniform',
                             activation="sigmoid"))
        classifier.compile(optimizer="adam", 
                       loss = "binary_crossentropy",
                       metrics = ["accuracy"])
        return classifier
    
    def message_print(self,msg):
        print ("##################################")
        print (msg)
        print ("##################################")
 
    def print_accuracies(self):
 
        self.message_print('ACCURACIES:'+str(self.accuracies))
        self.message_print('MEAN:' + str(np.mean(self.accuracies)))
        self.message_print('STD:' + str(np.std(self.accuracies)))

In [None]:
if __name__ == "__main__":
    # DO preprocessing here and get X_train and y_train
    test_obj = testnjob(X_train, y_train)
    test_obj.print_accuracies()