## Deep Learning - Artificial Neural Networks

In [1]:
#importing libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#importing dataset
dataset=pd.read_csv('./../0. DataSets/Churn_Modelling.csv')
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
#Faz com que os displays (tanto do pandas como dos arrays tenham notacao decimal)
dataset.round()
pd.options.display.float_format = '{:,.0f}'.format
float_formatter = lambda x: "%.2f" % x
np.set_printoptions(formatter={'float_kind':float_formatter})

## Business Problem explanation:

In [4]:
#we have a list of customers from a bank with loads of different independent variables.
#based on these variables we want to predict the last column, that is, if the customer left/exited the bank or not

In [5]:
#Create X matrix with all the independent variables
#Create y vector with all the dependent variables
X=dataset.iloc[:,3:13].values #ignored the first columns, ie. customer id, surname
y=dataset.iloc[:,13].values

In [6]:
X

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [7]:
#Encoding categorical data
#We only have 2 categorical variables: Geography and Gender. We have to create 2 different encoders

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

#encode Geography, that means turn a word and turn into a number. On this case France will be turned into 0 and Spain will be turned into 2
labelencoder_X_1=LabelEncoder()
X[:,1]=labelencoder_X_1.fit_transform(X[:,1])

#encode Gender
labelencoder_X_2=LabelEncoder()
X[:,2]=labelencoder_X_2.fit_transform(X[:,2])

#Vamos onehotencode coluna 1. Que e geography.
#One hot encoded columns passam para o inicio da matrix (deixa de ser o credit score que agora esta na posicao 3)
onehotencoder=OneHotEncoder(categorical_features=[1])
X=onehotencoder.fit_transform(X).toarray()


In [8]:
#como fizemos one hot encode e ficamos com tres colunas (correnspondendo a spain,france e outro pais)
#podemos eliminar uma coluna para nao cair na dummy variable trap. ficando so com 2 dummy variables
X=X[:,1:]
X

array([[0.00, 0.00, 619.00, ..., 1.00, 1.00, 101348.88],
       [0.00, 1.00, 608.00, ..., 0.00, 1.00, 112542.58],
       [0.00, 0.00, 502.00, ..., 1.00, 0.00, 113931.57],
       ...,
       [0.00, 0.00, 709.00, ..., 0.00, 1.00, 42085.58],
       [1.00, 0.00, 772.00, ..., 1.00, 0.00, 92888.52],
       [0.00, 0.00, 792.00, ..., 1.00, 0.00, 38190.78]])

In [9]:
#Now, we have to split the dataset into the training set/test set
# from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=0)


In [10]:
#Feature scaling
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [11]:
#Make the Artificial Neural Network

#Importing Keras libraries and required packages
import keras
from keras.models import Sequential
from keras.layers import Dense


Using TensorFlow backend.


In [17]:
#Initialising the ANN
classifier=Sequential()

#Adding the input layer and the first hidden layer
classifier.add(Dense(output_dim=6, init='uniform', activation='relu',input_dim=11))
#So precisamos de definir o input layer no primeiro layer, dps disso a ANN descobre automaticamente


#uma tip que deram para o output_dim e a media da soma dos inputs parameters (11) com os output parameters (1).
#neste caso (11+1) / 2 = 6

#the inits and activations and optimizers and losses are different if instead of a binary classification we had more categories
#Adding the second hidden layer
classifier.add(Dense(output_dim=6, init='uniform', activation='relu'))

#Adding the output layer
classifier.add(Dense(output_dim=1, init='uniform', activation='sigmoid'))

#Compiling the ANN 
classifier.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])



In [18]:
#Fitting the ANN to the training set
classifier.fit(X_train,y_train,batch_size=10, nb_epoch=100)

  from ipykernel import kernelapp as app


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f63c9dd5a90>

In [19]:
#agora que treinei o modelo vou usar a matrix X_test para o testar.
y_pred=classifier.predict(X_test)
y_pred

In [22]:
#para fazer a confusion matrix escolho um treshold onde digo que acima desse valor retorna true (neste caso o cliente deixou o banco)
y_pred=(y_pred>0.5)
y_pred

array([[False],
       [False],
       [False],
       ...,
       [False],
       [False],
       [False]])

In [26]:
from sklearn.metrics import confusion_matrix

#vou comparar os resultados do meu modelo com os resultados reais fazendo uma confusion matrix
cm=confusion_matrix(y_test,y_pred)

In [24]:
cm

array([[1482,  113],
       [ 173,  232]])

retornou isto:

array([[1482,  113],
       [ 173,  232]])

ou seja o meu modelo preveu que 1482 nao se iam embora e nao foram
232 pessoas iam embora e foram

falsos:
173 nao iam embora e foram 
113 iam embora e nao foram

In [25]:
#calcular a accuracy do modelo

(1482+232)/(1482+113+173+232)

0.857