In [0]:
# Artificial Neural Network

# Installing Theano
# pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git

# Installing Tensorflow
# pip install tensorflow

# Installing Keras
# pip install --upgrade keras

# Part 1 - Data Preprocessing

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:, 1:]

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


# Keras ANN

In [0]:
# Part 2 - Now let's make the ANN!

# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [0]:
# Initializing the ANN
classifier = Sequential()

# Train ANN
# Stochastic Gradient Descent
# 1. Randomly initialize weights to small numbers close to 0 (but not 0)

# 2. Input first observation in input layer, each feature in one input node

# 3. Forward propagation. Neurons are activated according to their weights.

# 4. Compare predicted and actual. Measure error.

# 5. Back propagation. 
# Propagate error, update weights according to how much they are 
# responsible for the error

# 6. Repeat 1-5 and:
#     1. update weights after each observation (reinforced learning)
#     2. update weights after batch of observations (batch learning)

# 7. When whole training set is done, redo more epochs

##########################################################################

# Add input layer and first hidden layer

# Tip for nodes in hidden layer: 
# use average between nodes in input layer and ouput layer

# Otherwise use parameter tuning (ex. k-fold cross-validation)

# Here: input = 11, output = 1 because it's binary
# init = weights initialization (here = uniform)
# activation function = rectifier function for hidden layer ("relu")

classifier.add(Dense(units=6, init='uniform', activation='relu', input_dim=11))
classifier.add(Dropout(p=0.1))

# Second hidden layer
classifier.add(Dense(units=6, init='uniform', activation='relu'))
classifier.add(Dropout(p=0.1))

# Output layer
# one output, dependent variable is boolean
# Activation = sigmoid
classifier.add(Dense(units=1, init='uniform', activation='sigmoid'))

# Compile:
# Need optimizer algorithm. Here: Stochastic Gradient Descent
# (specifically: we can use Adam)
# Loss function (sum of square errors in linear regression, but logarithmic loss here)
# that we need to optimize through stochastic gradient descent
# If binary outcome: binary_crossentropy (otherwise categorical_crossentropy)
# Metrics: list of metrics to be evaluated
classifier.compile(optimizer='adam', loss='binary_crossentropy', 
                   metrics = ['accuracy'])

In [0]:
# Fit ANN to training
# batch size: number of observations after which we update weights
# epochs: number of times the whole dataset passes through the ANN
# both chosen arbitrarily

classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100

<keras.callbacks.History at 0x7fe68550f0b8>

In [0]:
y_pred = classifier.predict(X_test)

In [0]:
y_pred = (y_pred > 0.5)

In [0]:
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))

[[1548   47]
 [ 268  137]]


In [0]:
# Predict a single new observation
# Don't forget to scale!
new_prediction = classifier.predict(sc.transform(np.array([[0,0,600,1,40,3,60000,2,1,1,50000]])))



In [0]:
new_prediction = (new_prediction > 0.5)

In [0]:
new_prediction

array([[False]])

# Evaluating the ANN

# K-fold cross validation

In [0]:
# we need to combine keras and scikit-learn
# use keras wrapper that includes scikit-learn k-fold CF

# import keras wrapper
from keras.wrappers.scikit_learn import KerasClassifier
# import cross-validation function
from sklearn.model_selection import cross_val_score

In [0]:
# model definition function
def build_classifier(optimizer='adam'):
  # Initializing the ANN
  classifier = Sequential()

  classifier.add(Dense(units=6, init='uniform', activation='relu', input_dim=11))
  classifier.add(Dense(units=6, init='uniform', activation='relu'))
  classifier.add(Dense(units=1, init='uniform', activation='sigmoid'))

  classifier.compile(optimizer=optimizer, loss='binary_crossentropy', 
                   metrics = ['accuracy'])

  return classifier

In [0]:
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, epochs = 100)

In [0]:
# we're checking 2 things: 
# 1. relevance factor and 
# 2. where we are in bias / variance tradeoff 
# (we need high accuracy and small difference between accuracies)

# k-fold CV will return an accuracy for k iteractions
# we want good accuracy: low bias and not too much variance

 # number of folds = 10 arbitrary but often used
NB_CV = 10

# n_jobs: run multiple jobs at the same time (-1 = all CPUs)
accuracies = cross_val_score(estimator=classifier, X=X_train,y=y_train, 
                             cv = NB_CV, n_jobs=-1)

In [0]:
mean = accuracies.mean()
variance = accuracies.std()

# Parameter tuning with GridSearch

In [0]:
# we need to combine keras and scikit-learn
# use keras wrapper that includes scikit-learn k-fold CF

# import keras wrapper
from keras.wrappers.scikit_learn import KerasClassifier
# import cross-validation function
from sklearn.model_selection import GridSearchCV

In [0]:
# parameters to tune
parameters = {
    'batch_size': [25, 32]
    ,'nb_epoch': [100, 500]
    ,'optimizer': ['adam', 'rmsprop']
}

In [0]:
grid_search = GridSearchCV(
    estimator = classifier, 
    param_grid=parameters,
    scoring='accuracy',
    cv=10)

In [0]:
# fit to training set
# THIS WILL BE LONG
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


  """
  
  import sys


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


KeyboardInterrupt: ignored