## Data Pre-processing

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [2]:
X_df = dataset.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)
y = dataset.iloc[:, 13].values

In [3]:
X_df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.0,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.8,3,1,0,113931.57
3,699,France,Female,39,1,0.0,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1


In [4]:
y

array([1, 0, 1, ..., 1, 1, 0], dtype=int64)

## Transform Categorical Variables

In [5]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
label_encoder_gender = LabelEncoder()
one_hot_encoder = OneHotEncoder(sparse=False)
geography_df = pd.DataFrame(one_hot_encoder.fit_transform(X_df['Geography'].values.reshape(-1, 1)), columns = one_hot_encoder.categories_)
geography_df.drop([geography_df.columns[0]], axis=1, inplace=True)
X_df['Gender'] = label_encoder_gender.fit_transform(X_df['Gender'])
label_gender_mapping = dict(zip(label_encoder_gender.transform(label_encoder_gender.classes_), label_encoder_gender.classes_))
X_df.rename(columns={'Gender': label_gender_mapping[1]}, inplace=True)
X_df = X_df.merge(geography_df, left_index=True, right_index=True)
X_df.drop(['Geography'], axis=1, inplace=True)
'''
only 2 dummy variables for the country: for example spain, france while not labeling germany 
since it can correspond to value zero from both spain and france
'''
#countries_data
X_df.head()

Unnamed: 0,CreditScore,Male,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,"(Germany,)","(Spain,)"
0,619,0,42,2,0.0,1,1,1,101348.88,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0.0,1.0


## Split Train and Test set

In [6]:
from sklearn.model_selection import train_test_split
X = X_df.to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Feature Scaling

In [7]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Artificial Neural Network Implementation

In [8]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

Using TensorFlow backend.


In [9]:
def build_classifier(optimizer='adam', nn_count = 6):
    #Initializing the ANN
    classifier = Sequential() #Define as a sequence of layers

    #add input layer and first hidden layer with dropout
    #randomly initialize weights close to zero (using kernel initializer parameter)
    #using 6 as output dimension as this is the average between input layers + output layers
    input_to_hidden_layer = Dense(units = nn_count, kernel_initializer="uniform", activation = 'relu', input_dim = 11)
    classifier.add(input_to_hidden_layer)
    #disable random neurons at every iteration to lower overfitting
    classifier.add(Dropout(rate = 0.1)) #disable 10% of the neurons at every iteration 
    
    #add second hidden layer
    second_hidden_layer = Dense(units = nn_count, kernel_initializer="uniform", activation = 'relu')
    classifier.add(second_hidden_layer)
    classifier.add(Dropout(rate = 0.1)) #disable 10% of the neurons at every iteration 

    #add output layer
    output_layer = Dense(units = 1, kernel_initializer="uniform", activation = 'sigmoid')
    classifier.add(output_layer)
    classifier.add(Dropout(rate = 0.1)) #disable 10% of the neurons at every iteration 
    
    #compiling ANN
    #set up with stochastic gradient descent, in the case of keras let's use adam optimizer
    #cost function (loss): Logarithmic loss function
    #metrics to evaluate the model
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier

## Fitting and Tunning the ANN to the training set 

In [2]:
#Implement K fold cross validation

# Grid search using K cross validation
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

#create hyperparameters to evaluate
hyperparams = {'batch_size': [32], 
              'epochs': [500], 
              'optimizer': ['adam', 'rmsprop'],
               'nn_count': [6, 8, 10]
             }

#k = 10 folds
#n_jobs = number of cpu's to do the job (-1 to use all cpu's in parallel)
model = KerasClassifier(build_fn=build_classifier)
grid_search = GridSearchCV(estimator = model, param_grid = hyperparams, scoring= 'accuracy', cv = 10)

grid_search = grid_search.fit(X_train, y_train)

NameError: name 'build_classifier' is not defined

In [None]:
best_parameters = grid_search.best_params_
best_parameters

In [None]:
best_accuracy = grid_search.best_score_
best_accuracy

## Evaluate Model

In [13]:
classifier = build_classifier(best_parameters['optimizer'], best_parameters['nn_count'])
classifier.fit(X_train, y_train, batch_size = best_parameters['batch_size'], epochs = best_parameters['epochs'])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 

Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500
Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 

Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500
Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500


<keras.callbacks.callbacks.History at 0x215d4f92c48>

In [14]:
y_pred = classifier.predict(X_test)
y_pred_binary = (y_pred > 0.5)

## Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix, accuracy_score

cm = confusion_matrix(y_pred_binary, y_test)
cm

array([[1566,  289],
       [  29,  116]], dtype=int64)

In [16]:
accuracy_score(y_pred_binary, y_test)

0.841

## Predict specific test Entity

In [17]:
test_entity = np.array([[600, 1, 40, 3, 60000, 2, 1, 1, 50000, 0, 0]])
scaled_test_entity = sc.transform(test_entity)
entity_pred = classifier.predict(scaled_test_entity)
entity_pred

array([[0.10377847]], dtype=float32)