# Artificial Neural Network

## Installing Theano

In [2]:
#!pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git
# Theano is an open source numeric computation library, very efficient and based on numpy syntax.  Theano can not only run on 
# your CPU (Central Processing Unit) but also on your GPU (Graphical Processing Unit)
!pip install theano



## Installing Tensorflow and Keras

In [4]:
# Installing Tensorflow
# Install Tensorflow from the website: https://www.tensorflow.org/versions/r0.12/get_started/os_setup.html
# Tensorflow is another open source numeric computation library, runs very fast computation and can run either on your CPU or GPU
# If you are using Theano and Tensorflow together it means that you are building a deep neural network FROM SCRATCH
#!pip install tensorflow
# However if you want to wrap Theano and Tensorflow together then you can directly use Keras.  This library allows us to build a
# deep neural network using a few lines of code.  Keras is extremely powerful and runs on Theano and Tensorflow libraries. 
# Installing Keras
!pip install --upgrade keras

Requirement already up-to-date: keras in c:\programdata\anaconda3\lib\site-packages (2.2.4)


# Part 1 - Data Preprocessing

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [6]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
#Since the first column is considered column "0" and we need columns from 3 to 12 - we put 3:13 (the upper bound 13 is excluded)
X = dataset.iloc[:, 3:13].values
#The label or dependent variable is the 13th variable that is considered
y = dataset.iloc[:, 13].values

In [7]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [8]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
#Here we are encoding the country and gender features below
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
#The country variable is again changed from label encoding to one hot encoding thereby introducing dummy variables
#In the next line the [1] refers to the country variable which is the second from left.  First one being 0 and second one is 1
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
#The next line eliminates THE DUMMY VARIABLE TRAP by reducing one column.  Here whenever two columns have 0 it assumes it is the
#third country
X = X[:, 1:]

In [9]:
X

array([[0.0000000e+00, 0.0000000e+00, 6.1900000e+02, ..., 1.0000000e+00,
        1.0000000e+00, 1.0134888e+05],
       [0.0000000e+00, 1.0000000e+00, 6.0800000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 1.1254258e+05],
       [0.0000000e+00, 0.0000000e+00, 5.0200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 1.1393157e+05],
       ...,
       [0.0000000e+00, 0.0000000e+00, 7.0900000e+02, ..., 0.0000000e+00,
        1.0000000e+00, 4.2085580e+04],
       [1.0000000e+00, 0.0000000e+00, 7.7200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 9.2888520e+04],
       [0.0000000e+00, 0.0000000e+00, 7.9200000e+02, ..., 1.0000000e+00,
        0.0000000e+00, 3.8190780e+04]])

In [10]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [11]:
# Feature Scaling
#Feature scaling is very essential in deep learning.  It eliminates the value of one independent variable dominating the others
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
X_train

array([[-0.5698444 ,  1.74309049,  0.16958176, ...,  0.64259497,
        -1.03227043,  1.10643166],
       [ 1.75486502, -0.57369368, -2.30455945, ...,  0.64259497,
         0.9687384 , -0.74866447],
       [-0.5698444 , -0.57369368, -1.19119591, ...,  0.64259497,
        -1.03227043,  1.48533467],
       ...,
       [-0.5698444 , -0.57369368,  0.9015152 , ...,  0.64259497,
        -1.03227043,  1.41231994],
       [-0.5698444 ,  1.74309049, -0.62420521, ...,  0.64259497,
         0.9687384 ,  0.84432121],
       [ 1.75486502, -0.57369368, -0.28401079, ...,  0.64259497,
        -1.03227043,  0.32472465]])

# Part 2 - Now let's make the ANN!

In [5]:
# Importing the Keras libraries and packages
import keras
#The sequential library is used to initialize our neural network
from keras.models import Sequential
#The dense library is used to build the layers of our ANN
from keras.layers import Dense

  '{0}.{1}.{2}'.format(*version.hdf5_built_version_tuple)
Using TensorFlow backend.


In [6]:
# Initialising the ANN
classifier = Sequential()

In [7]:
# Adding the input layer and the first hidden layer.  Here the rectifier activation function is used for the hidden layer. 
# input_dim refers to the number of features that feed as input. This is only mentioned for the first hidden layer
# output_dim refers to the number of nodes of the first hidden layer.  Normally the number of nodes is decided as the average of
# the number of points in the input layer (number of features) and the number of points in the output layer. (i+o)/2.  Here it
# is 11+1=12/2=6
# init refers to randomly initializing the weights in the first run of forward propogation in the stochastic gradient descent. 
# You do this initialization uniformly.  Two options are glorot_uniform and uniform. 
# The activation function relu refers to the rectifier activation function
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu', input_dim = 11))

Instructions for updating:
Colocations handled automatically by placer.


  


In [8]:
# Adding the second hidden layer.  Again the rectifier activation function is used for the hidden layer
# As you notice the input_dim is missing as we are trying to create a second hidden layer - for which input layer is not required
classifier.add(Dense(output_dim = 6, init = 'uniform', activation = 'relu'))

  


In [9]:
# Adding the output layer.  Here the sigmoid activation function is used for the output layer as we are looking for probabilities
# in this case the output_dim has a value of 1 as it refers to the predicted label field. 
classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))

  


In [10]:
# Compiling the ANN
# Optimizer is the algorithm you would use to find the optimal set of weights.  There are different types of stochastic gradient
# descent algorithms.  One of the most efficient one is the adam algorithm
# Loss refers to the loss function within the stochastic gradient descent adam algorithm. If the predicted label has two outcomes
# then the loss function used is binary_crossentropy.  If there are more than two then it is categorical_crossentropy.
# For metrics here we are going to use accuracy as a measurement to see how well the algorithm is performing
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [17]:
# Fitting the ANN to the Training set
# batch_size refers to the number of rows/observations that you want to consider for each run when calibrating the weights
# nb_epoch refers to the number of times the ann would be iterated
classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100)

Instructions for updating:
Use tf.cast instead.


  


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x248cff6cb70>

# Part 3 - Making the predictions and evaluating the model

In [18]:
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

In [19]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [20]:
cm

array([[1544,   51],
       [ 261,  144]], dtype=int64)