# <span class="mark">Preparing model</span>

#### Import modules

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
import keras

Using TensorFlow backend.


In [3]:
from keras.models import Sequential
from keras.layers import Dense # Randomy initialize the width to small numbers
from keras.layers import Dropout # Randomy drop nerons

In [28]:
from keras.wrappers.scikit_learn import KerasClassifier

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

In [29]:
from sklearn.model_selection import cross_val_score # Model accuracy validation class

In [30]:
from sklearn.model_selection import GridSearchCV # Alternative to cross_val_score model validation

In [25]:
from sklearn.metrics import confusion_matrix

#### <span class="girk">y - inspected value, x - data model</span>

In [5]:
dataset = pd.read_csv('./Churn_Modelling.csv')

In [6]:
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [7]:
print(dataset[0:1])

   RowNumber  CustomerId   Surname  CreditScore Geography  Gender  Age  \
0          1    15634602  Hargrave          619    France  Female   42   

   Tenure  Balance  NumOfProducts  HasCrCard  IsActiveMember  EstimatedSalary  \
0       2      0.0              1          1               1        101348.88   

   Exited  
0       1  


#### <span class="girk">Encoding categorical data</span>

In [8]:
labelEncoder_X_1 = LabelEncoder()
X[:, 1] = labelEncoder_X_1.fit_transform(X[:, 1])

In [9]:
labelEncoder_X_2 = LabelEncoder()
X[:, 2] = labelEncoder_X_2.fit_transform(X[:, 2])

In [10]:
oneHotEncoder = OneHotEncoder(categorical_features = [1])
X = oneHotEncoder.fit_transform(X).toarray()

In [11]:
X = X[:, 1:] # avoid dummy variable trap

In [12]:
print(X[0:3,:5])

[[   0.    0.  619.    0.   42.]
 [   0.    1.  608.    0.   41.]
 [   0.    0.  502.    0.   42.]]


#### <span class="girk">Splitting dataset Training set and Test set</span>

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#### <span class="girk">Feature scaling</span>

In [14]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [15]:
 print(X_test)

[[ 1.75486502 -0.57369368 -0.55204276 ...,  0.64259497  0.9687384
   1.61085707]
 [-0.5698444  -0.57369368 -1.31490297 ...,  0.64259497 -1.03227043
   0.49587037]
 [-0.5698444   1.74309049  0.57162971 ...,  0.64259497  0.9687384
  -0.42478674]
 ..., 
 [-0.5698444   1.74309049 -0.74791227 ...,  0.64259497 -1.03227043
   0.71888467]
 [ 1.75486502 -0.57369368 -0.00566991 ...,  0.64259497  0.9687384
  -1.54507805]
 [ 1.75486502 -0.57369368 -0.79945688 ...,  0.64259497 -1.03227043
   1.61255917]]


# <span class="mark">_Create ANN Classifier_</span>

#### <span class="girk">Predict test result</span>

In [16]:
classifier = Sequential() # init ANN 

In [17]:
# adding input layer and the first hidden layer
classifier.add(Dense(6, kernel_initializer="uniform", activation="relu", input_shape=(11,)))
classifier.add(Dropout(rate = 0.1)) # adding neron dropuot

In [18]:
# adding second hidden layer
classifier.add(Dense(6, kernel_initializer="uniform", activation="relu"))

In [19]:
# adding output layer
classifier.add(Dense(1, kernel_initializer="uniform", activation="sigmoid")) # softmax func for more than 2 categories

In [20]:
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [21]:
classifier.fit(X_train, y_train, batch_size=10, epochs=100) # Fit ANN to training set

Epoch 1/100
Epoch 2/100
Epoch 3/100

In [22]:
Y_pred = classifier.predict(X_test) # vector of prediction

In [23]:
Y_pred = (Y_pred > 0.5)

In [55]:
print(Y_pred)

[[False]
 [False]
 [False]
 ..., 
 [False]
 [False]
 [False]]


#### <span class="girk">Evaluate correletion</span>

In [26]:
cm = confusion_matrix(y_test, Y_pred)

In [27]:
print(cm)

[[1548   47]
 [ 264  141]]


#### <span class="girk">Evaluating the ANN </span>

In [46]:
def build_classifier(optimizer='adam', kernel_initializer="uniform", loss='binary_crossentropy'):
    classifier = Sequential()
    classifier.add(Dense(6, kernel_initializer=kernel_initializer, activation="relu", input_shape=(11,)))
    classifier.add(Dense(6, kernel_initializer=kernel_initializer, activation="relu"))
    classifier.add(Dense(1, kernel_initializer=kernel_initializer, activation="sigmoid"))
    classifier.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return classifier

In [32]:
classifier2 = KerasClassifier(build_fn = build_classifier, batch_size = 10, nb_epoch = 100)

In [None]:
accuracies = cross_val_score(estimator = classifier2, X = X_train, y = y_train, cv = 10, n_jobs = -1)

In [None]:
mean = accuracies.mean()

In [None]:
variance = accuracies.std()

#### <span class="girk">Parameters tunning</span>

In [48]:
classifier3 = KerasClassifier(build_fn = build_classifier)

In [49]:
parameters = {
    'batch_size': [25, 32],
    'nb_epoch': [100, 500],
    'optimizer': ['adam', 'rmsprop']
}

In [50]:
grid_search = GridSearchCV(estimator=classifier3, param_grid=parameters, scoring='accuracy', cv=10)

In [51]:
grid_search.fit(X_train, y_train)

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1

In [53]:
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_

In [54]:
print(best_parameters,best_accuracy)

{'batch_size': 25, 'nb_epoch': 100, 'optimizer': 'adam'} 0.796
