## Data Preprocesing

In [5]:
import pandas as pd

dataset = pd.read_csv('Datasets/Churn_Modelling.csv')

geography = pd.get_dummies(dataset['Geography'], drop_first = True)
dataset.drop(['Geography'], axis = 1, inplace = True)
dataset = pd.concat([dataset, geography], axis = 1)

gender = pd.get_dummies(dataset['Gender'], drop_first = True)
dataset.drop(['Gender'], axis = 1, inplace = True)
dataset = pd.concat([dataset, gender], axis = 1)

In [8]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Germany,Spain,Male
0,1,15634602,Hargrave,619,42,2,0.0,1,1,1,101348.88,1,0,0,0
1,2,15647311,Hill,608,41,1,83807.86,1,0,1,112542.58,0,0,1,0
2,3,15619304,Onio,502,42,8,159660.8,3,1,0,113931.57,1,0,0,0
3,4,15701354,Boni,699,39,1,0.0,2,0,0,93826.63,0,0,0,0
4,5,15737888,Mitchell,850,43,2,125510.82,1,1,1,79084.1,0,0,1,0


## Define Training Set & Test Set & Scale the Data

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

y = dataset['Exited'].values
dataset.drop('Exited', axis = 1, inplace = True)

X = dataset.iloc[:, 3:].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, random_state = 0)

sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.fit_transform(X_test)

## Create a Simple ANN

In [13]:
import keras
from keras.models import Sequential
from keras.layers import Dense

ann_classifier = Sequential()
ann_classifier.add(Dense(units = 6, activation = 'relu', kernel_initializer = 'uniform', input_dim = 11))
ann_classifier.add(Dense(units = 6, activation = 'relu', kernel_initializer = 'uniform'))
ann_classifier.add(Dense(units = 1, activation = 'sigmoid', kernel_initializer = 'uniform'))
ann_classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

ann_classifier.fit(X_train, y_train, batch_size = 24, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x2de9979d408>

## Measure Performance

In [19]:
from sklearn.metrics import accuracy_score, confusion_matrix

y_pred = ann_classifier.predict(X_test) > .5

print(f'confusion matrix:\n{confusion_matrix(y_test, y_pred)}')
print(f'accuray score : {accuracy_score(y_test, y_pred)}')

confusion matrix:
[[1573   22]
 [ 315   90]]
accuray score : 0.8315


## Apply K-Fold Cross Validation

In [29]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score

def build_ann_classifier():
    ann_classifier = Sequential()
    ann_classifier.add(Dense(units = 6, activation = 'relu', kernel_initializer = 'uniform', input_dim = 11))
    ann_classifier.add(Dense(units = 6, activation = 'relu', kernel_initializer = 'uniform'))
    ann_classifier.add(Dense(units = 1, activation = 'sigmoid', kernel_initializer = 'uniform'))
    ann_classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return ann_classifier

ann_classifier_2 = KerasClassifier(build_fn = build_ann_classifier, batch_size = 10, epochs = 2)
accuracies = cross_val_score(estimator = ann_classifier_2, X = X_train, y = y_train, cv = 3)

Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2


In [32]:
print(f'accuracy mean: {accuracies.mean()}')
print(f'std deviation: {accuracies.std()}')


accuracy mean: 0.7959998846054077
std deviation: 0.003717187443768525
