# Artificial Neural Network

## Part 1 - Data Preprocessing

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
dataset.groupby(['Exited'])['Exited'].count()

Exited
0    7963
1    2037
Name: Exited, dtype: int64

In [6]:
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [7]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]

In [8]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = 0.2, 
                                                    random_state = 0)

In [9]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Part 2 - Create the ANN!

In [35]:
# Importing the Keras libraries and packages
import keras
from keras.utils import plot_model
from keras.models import Model,Sequential,load_model
from keras.layers import Input, Flatten, Dense, Dropout
from keras.layers.merge import concatenate
from keras import backend as K

In [11]:
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [12]:
# Initialising the ANN
classifier = Sequential()

In [13]:
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = X_train.shape[1], 
                     kernel_initializer = 'uniform', 
                     activation = 'relu', 
                     input_dim = X_train.shape[1]))

In [14]:
# Adding the second hidden layer
classifier.add(Dense(units = 6, 
                     kernel_initializer = 'uniform', 
                     activation = 'relu'))

In [15]:
# Adding a Dropout
classifier.add(Dropout(rate = 0.2))

In [16]:
# Adding the output layer
classifier.add(Dense(units = 1, 
                     kernel_initializer = 'uniform', 
                     activation = 'sigmoid'))

In [20]:
# Compiling the ANN
classifier.compile(optimizer = 'adamax', 
                   loss = 'binary_crossentropy', 
                   metrics = ['accuracy',f1])

In [18]:
X_train.shape

(8000, 11)

In [22]:
# Fitting the ANN to the Training set
classifier.fit(X_train,
               y_train,
               batch_size = 32,
               epochs = 5,
               validation_data=(X_test, y_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f487588cf60>

In [23]:
# Fitting the ANN to the Training set
history = classifier.fit(X_train, 
                         y_train, 
                         batch_size = 32, 
                         epochs = 10,
                         validation_data=(X_test, y_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
his_df = pd.DataFrame(history.history)

In [25]:
his_df

Unnamed: 0,val_loss,val_acc,val_f1,loss,acc,f1
0,0.353921,0.868,0.536606,0.375675,0.846,0.44865
1,0.351593,0.864,0.529951,0.374749,0.848625,0.456906
2,0.34868,0.8645,0.532045,0.370067,0.847625,0.470491
3,0.349196,0.864,0.547192,0.374166,0.846625,0.466117
4,0.348015,0.865,0.543843,0.370053,0.847,0.462003
5,0.346577,0.864,0.548771,0.366701,0.84525,0.448937
6,0.34581,0.865,0.551507,0.369694,0.84725,0.475291
7,0.346061,0.865,0.556417,0.365025,0.848875,0.46614
8,0.343662,0.8635,0.53812,0.366169,0.849,0.480222
9,0.34393,0.865,0.554526,0.364747,0.847875,0.485747


## Keras Functional API

In [28]:
input_shape = X_train.shape[1]

In [31]:
input_layer = Input(shape=(input_shape,))

hidden1 = Dense(units = X_train.shape[1],kernel_initializer = 'uniform',
                activation = 'relu')(input_layer)
hidden2 = Dense(units = 6,kernel_initializer = 'uniform',
                activation = 'relu')(hidden1)
dropout = Dropout(rate=0.2)(hidden2)
output_layer = Dense(units = 1,kernel_initializer = 'uniform',
                activation = 'sigmoid')(dropout)

model = Model(inputs=input_layer, outputs=output_layer)

# Compiling the ANN
model.compile(optimizer = 'adamax', loss = 'binary_crossentropy', metrics = ['accuracy',f1])

# summarize layers
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 11)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 11)                132       
_________________________________________________________________
dense_5 (Dense)              (None, 6)                 72        
_________________________________________________________________
dropout_2 (Dropout)          (None, 6)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 7         
Total params: 211
Trainable params: 211
Non-trainable params: 0
_________________________________________________________________
None


In [32]:
plot_model(model, to_file='model.png')

In [33]:
plot_model(classifier, to_file='classifier.png')

In [34]:
model.fit(X_train, 
          y_train,
          batch_size = 32,
          epochs = 10,
          validation_data=(X_test, y_test))

Train on 8000 samples, validate on 2000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f4874c48a90>

## Save and Load Model

In [36]:
model.save("Model.h5")

In [37]:
loaded_model = load_model("Model.h5")

ValueError: Unknown metric function:f1

## Part 3 - Making predictions and evaluating the model

In [20]:
classifier.predict(X_test)

array([[0.22698179],
       [0.31629607],
       [0.1657943 ],
       ...,
       [0.1703078 ],
       [0.15399894],
       [0.15634158]], dtype=float32)

In [21]:
# Predicting the Test set results
y_pred = classifier.predict_classes(X_test)
y_pred

array([[0],
       [0],
       [0],
       ...,
       [0],
       [0],
       [0]], dtype=int32)

In [22]:
np.unique(y_pred)

array([0, 1], dtype=int32)

#### Predicting a single new observation
"""Predict if the customer with the following informations will leave the bank:
Geography: France
Credit Score: 600
Gender: Male
Age: 40
Tenure: 3
Balance: 60000
Number of Products: 2
Has Credit Card: Yes
Is Active Member: Yes
Estimated Salary: 50000"""

In [23]:
new_prediction = classifier.predict_classes(sc.transform(np.array([[0.0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))
new_prediction

array([[0]], dtype=int32)

In [24]:
# Evaluating the Keras Model
from sklearn.metrics import (confusion_matrix, accuracy_score, precision_score, 
                             recall_score, f1_score)

In [25]:
print('Confusion Matrix for ANN: \n',confusion_matrix(y_test, classifier.predict_classes(X_test)))
print('Accuracy for ANN: \n',accuracy_score(y_test, classifier.predict_classes(X_test)))
print('Precision for ANN: \n',precision_score(y_test, classifier.predict_classes(X_test)))
print('Recall for ANN: \n',recall_score(y_test, classifier.predict_classes(X_test)))
print('f1_score for ANN: \n',f1_score(y_test, classifier.predict_classes(X_test)))

Confusion Matrix for ANN: 
 [[1546   49]
 [ 265  140]]
Accuracy for ANN: 
 0.843
Precision for ANN: 
 0.7407407407407407
Recall for ANN: 
 0.345679012345679
f1_score for ANN: 
 0.4713804713804714


## Part 4 - Hyperparameter tuning for the Neural Network

In [26]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [43]:
# Function to get the Keras Model
def build_classifier():
    classifier = Sequential()
    classifier.add(Dense(units = 6, input_dim=11, activation = 'relu'))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', 
                       loss = 'binary_crossentropy', 
                       metrics = ['accuracy'])
    return classifier

In [44]:
model = KerasClassifier(build_fn=build_classifier, verbose=0)

### Tune for batch_size and number of epochs

In [45]:
# define the grid search parameters for batch_size and epochs
batch_size = [100,200,500]
epochs = [1,2,3]

In [46]:
param_grid_1 = dict(batch_size=batch_size, epochs=epochs)
param_grid_1

{'batch_size': [100, 200, 500], 'epochs': [1, 2, 3]}

In [47]:
grid = GridSearchCV(estimator=model, 
                    param_grid=param_grid_1, 
                    #cv=5,
                    n_jobs=-1)

In [None]:
grid_result = grid.fit(X_train, y_train)

In [None]:
grid_result.best_score_

In [None]:
grid_result.best_params_

In [None]:
# Function to get the Keras Model
def build_classifier2(optimizer = 'adam'):
    classifier = Sequential()
    classifier.add(Dense(units = 6, activation = 'relu', input_dim=11))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, 
                       loss = 'binary_crossentropy', 
                       metrics = ['accuracy'])
    return classifier

In [None]:
model2 = KerasClassifier(build_fn=build_classifier2, verbose=0)

In [53]:
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']

In [54]:
param_grid_2 = dict(optimizer=optimizer)
param_grid_2

{'optimizer': ['SGD',
  'RMSprop',
  'Adagrad',
  'Adadelta',
  'Adam',
  'Adamax',
  'Nadam']}

In [None]:
grid2 = GridSearchCV(estimator=model2, 
                    param_grid=param_grid_2, 
                    #cv=5,
                    n_jobs=-1)

In [None]:
grid_result2 = grid2.fit(X_train, 
                         y_train, 
                         batch_size=grid_result.best_params_['batch_size'],
                         epochs=grid_result.best_params_['epochs'])

In [None]:
print("The best optimizer is: ",grid_result2.best_params_['optimizer'])

In [None]:
# Function to get the Keras Model
def build_classifier3(learning_rate=0.01):
    classifier = Sequential()
    classifier.add(Dense(units = 6, activation = 'relu', input_dim=11))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    optimizer = Adam(lr=learning_rate)
    classifier.compile(optimizer = optimizer, 
                       loss = 'binary_crossentropy', 
                       metrics = ['accuracy'])
    return classifier

In [None]:
model3 = KerasClassifier(build_fn=build_classifier3, verbose=0)

In [None]:
learning_rate = [0.0001, 0.001, 0.01, 0.1]

In [None]:
param_grid_3 = dict(learning_rate=learning_rate)
param_grid_3

In [None]:
grid3 = GridSearchCV(estimator=model3, 
                    param_grid=param_grid_3, 
                    #cv=5,
                    n_jobs=-1)

In [None]:
grid_result3 = grid3.fit(X_train, 
                         y_train, 
                         batch_size=grid_result.best_params_['batch_size'],
                         epochs=grid_result.best_params_['epochs'])

In [None]:
print("The optimum learning rate for the best optimizer is: ",grid_result3.best_params_['learning_rate'])

In [None]:
# Function to get the Keras Model
def build_classifier4(init_mode='uniform'):
    classifier = Sequential()
    classifier.add(Dense(units = 6, , input_dim=11, kernel_initializer=init_mode, activation = 'relu'))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    optimizer = Adam(lr=learning_rate)
    classifier.compile(optimizer = optimizer, 
                       loss = 'binary_crossentropy', 
                       metrics = ['accuracy'])
    return classifier

In [None]:
model4 = KerasClassifier(build_fn=build_classifier4, verbose=0)

In [None]:
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']

In [None]:
param_grid_4 = dict(init_mode=init_mode)
param_grid_4

In [None]:
grid4 = GridSearchCV(estimator=model4, 
                    param_grid=param_grid_4, 
                    #cv=5,
                    n_jobs=-1)

In [None]:
grid_result4 = grid4.fit(X_train, 
                         y_train, 
                         batch_size=grid_result.best_params_['batch_size'],
                         epochs=grid_result.best_params_['epochs'])

In [None]:
print("The best weight initialization method is: ",grid_result4.best_params_['init_mode'])

In [None]:
# Function to get the Keras Model
def build_classifier5(activation1='relu', activation2='relu'):
    classifier = Sequential()
    classifier.add(Dense(units = 6, input_dim=11, 
                         kernel_initializer=init_mode, 
                         activation = activation1))
    classifier.add(Dense(units = 6, kernel_initializer=init_mode, 
                         activation = activation2))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    optimizer = Adam(lr=learning_rate)
    classifier.compile(optimizer = optimizer, 
                       loss = 'binary_crossentropy', 
                       metrics = ['accuracy'])
    return classifier

In [None]:
model5 = KerasClassifier(build_fn=build_classifier5, verbose=0)

In [None]:
activation1 = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
activation2 = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']

In [None]:
param_grid_5 = dict(activation=activation)
param_grid_5

In [None]:
grid5 = GridSearchCV(estimator=model5, 
                    param_grid=param_grid_5, 
                    #cv=5,
                    n_jobs=-1)

In [None]:
grid_result5 = grid5.fit(X_train, 
                         y_train, 
                         batch_size=grid_result.best_params_['batch_size'],
                         epochs=grid_result.best_params_['epochs'])

In [None]:
print("The best layer activation is: ",grid_result5.best_params_['activation'])

In [None]:
# Function to get the Keras Model
def build_classifier6(neurons=1):
    classifier = Sequential()
    classifier.add(Dense(neurons, 
                         input_dim=11, 
                         kernel_initializer=init_mode, 
                         activation = activation))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    optimizer = Adam(lr=learning_rate)
    classifier.compile(optimizer = optimizer, 
                       loss = 'binary_crossentropy', 
                       metrics = ['accuracy'])
    return classifier

In [None]:
model6 = KerasClassifier(build_fn=build_classifier6, verbose=0)

In [None]:
neurons = [1, 5, 10, 15, 20, 25, 30]

In [None]:
param_grid_6= dict(neurons=neurons)
param_grid_6

In [None]:
grid6 = GridSearchCV(estimator=model6, 
                    param_grid=param_grid_6, 
                    #cv=5,
                    n_jobs=-1)

In [None]:
grid_result6 = grid6.fit(X_train, 
                         y_train, 
                         batch_size=grid_result.best_params_['batch_size'],
                         epochs=grid_result.best_params_['epochs'])

In [None]:
print("The optimum number of neurons in hidden layer is: ",grid_result6.best_params_['neurons'])

## Part 5 - Create the Tuned Keras Neural Network

## Part 6 - Make Predictions and Evaluate Tuned Model