# Preparing model

#### Import modules

In [1]:
 import keras

Using TensorFlow backend.


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
from keras.models import Sequential
from keras.layers import Dense # Randomy initialize the width to small numbers

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

#### y - inspected value, x - data model

In [5]:
dataset = pd.read_csv('/home/jovyan/work/note/ML/NN/Churn_Modelling.csv')

In [6]:
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [7]:
print(dataset)

      RowNumber  CustomerId         Surname  CreditScore Geography  Gender  \
0             1    15634602        Hargrave          619    France  Female   
1             2    15647311            Hill          608     Spain  Female   
2             3    15619304            Onio          502    France  Female   
3             4    15701354            Boni          699    France  Female   
4             5    15737888        Mitchell          850     Spain  Female   
5             6    15574012             Chu          645     Spain    Male   
6             7    15592531        Bartlett          822    France    Male   
7             8    15656148          Obinna          376   Germany  Female   
8             9    15792365              He          501    France    Male   
9            10    15592389              H?          684    France    Male   
10           11    15767821          Bearce          528    France    Male   
11           12    15737173         Andrews          497     Spa

#### Encoding categorical data

In [8]:
labelEncoder_X_1 = LabelEncoder()
X[:, 1] = labelEncoder_X_1.fit_transform(X[:, 1])
labelEncoder_X_2 = LabelEncoder()
X[:, 2] = labelEncoder_X_2.fit_transform(X[:, 2])

oneHotEncoder = OneHotEncoder(categorical_features=[1])
X = oneHotEncoder.fit_transform(X).toarray()
X = X[:, 1:]

In [9]:
print(X)

[[  0.00000000e+00   0.00000000e+00   6.19000000e+02 ...,   1.00000000e+00
    1.00000000e+00   1.01348880e+05]
 [  0.00000000e+00   1.00000000e+00   6.08000000e+02 ...,   0.00000000e+00
    1.00000000e+00   1.12542580e+05]
 [  0.00000000e+00   0.00000000e+00   5.02000000e+02 ...,   1.00000000e+00
    0.00000000e+00   1.13931570e+05]
 ..., 
 [  0.00000000e+00   0.00000000e+00   7.09000000e+02 ...,   0.00000000e+00
    1.00000000e+00   4.20855800e+04]
 [  1.00000000e+00   0.00000000e+00   7.72000000e+02 ...,   1.00000000e+00
    0.00000000e+00   9.28885200e+04]
 [  0.00000000e+00   0.00000000e+00   7.92000000e+02 ...,   1.00000000e+00
    0.00000000e+00   3.81907800e+04]]


#### Splitting dataset Training set and Test set

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

#### Feature scaling

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
 print(X_test)

[[ 1.75486502 -0.57369368 -0.55204276 ...,  0.64259497  0.9687384
   1.61085707]
 [-0.5698444  -0.57369368 -1.31490297 ...,  0.64259497 -1.03227043
   0.49587037]
 [-0.5698444   1.74309049  0.57162971 ...,  0.64259497  0.9687384
  -0.42478674]
 ..., 
 [-0.5698444   1.74309049 -0.74791227 ...,  0.64259497 -1.03227043
   0.71888467]
 [ 1.75486502 -0.57369368 -0.00566991 ...,  0.64259497  0.9687384
  -1.54507805]
 [ 1.75486502 -0.57369368 -0.79945688 ...,  0.64259497 -1.03227043
   1.61255917]]


# _Create Classifier_

#### Predict test result

In [19]:
classifier = Sequential()

In [20]:
# adding layer to NN
classifier.add(Dense(6, kernel_initializer="uniform", activation="relu", input_shape=(11,)))

In [21]:
classifier.add(Dense(6, kernel_initializer="uniform", activation="relu"))

In [22]:
classifier.add(Dense(1, kernel_initializer="uniform", activation="sigmoid")) # softmax func for more than 2 categories

In [23]:
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [24]:
classifier.fit(X_train, y_train, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100

In [25]:
Y_pred = classifier.predict(X_test) # vector of prediction

In [26]:
Y_pred = (Y_pred > 0.5)

In [27]:
print(Y_pred)

[[False]
 [False]
 [False]
 ..., 
 [False]
 [False]
 [False]]


#### Evaluate correletion

In [28]:
from sklearn.metrics import confusion_matrix

In [30]:
cm = confusion_matrix(y_test, Y_pred)

In [31]:
print(cm)

[[1515   80]
 [ 195  210]]


#### Visualisation: Make a graph

In [32]:
# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Classifier (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

ValueError: Error when checking : expected dense_4_input to have shape (11,) but got array with shape (2,)

In [None]:
from matplotlib.colors import ListedColormap
X_set, y_set = X_test, y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Classifier (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()