In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import tflearn  # Higher level helper for tensorflow

In [3]:
'''
Load and inspect the dataset
'''

dataset = pd.read_csv("Churn_Modelling.csv")
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
'''
Extract useful data
First 3 columns are useless, remove them.
Last column is whether the customer left the bank or not.
'''

X = dataset.iloc[:,3:13].values
y = dataset.iloc[:,13].values

In [5]:
'''
Geography & gender is text, so they need to be converted
Method: one-hot representation
'''

from sklearn.preprocessing import LabelEncoder, OneHotEncoder

labelencoder_X_1 = LabelEncoder()
X[:,1] = labelencoder_X_1.fit_transform(X[:,1])
labelencoder_X_2 = LabelEncoder()
X[:,2] = labelencoder_X_2.fit_transform(X[:,2])

onehotencoder = OneHotEncoder(categorical_features=[1])
X = onehotencoder.fit_transform(X).toarray()
X = X[:,1:]

In [7]:
print(X)

[[0.0000000e+00 0.0000000e+00 6.1900000e+02 ... 1.0000000e+00
  1.0000000e+00 1.0134888e+05]
 [0.0000000e+00 1.0000000e+00 6.0800000e+02 ... 0.0000000e+00
  1.0000000e+00 1.1254258e+05]
 [0.0000000e+00 0.0000000e+00 5.0200000e+02 ... 1.0000000e+00
  0.0000000e+00 1.1393157e+05]
 ...
 [0.0000000e+00 0.0000000e+00 7.0900000e+02 ... 0.0000000e+00
  1.0000000e+00 4.2085580e+04]
 [1.0000000e+00 0.0000000e+00 7.7200000e+02 ... 1.0000000e+00
  0.0000000e+00 9.2888520e+04]
 [0.0000000e+00 0.0000000e+00 7.9200000e+02 ... 1.0000000e+00
  0.0000000e+00 3.8190780e+04]]


In [10]:
'''
France became 1, Female became 1, Spain became 0, etc
Now we can split to training and test set
'''

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
y_train = np.reshape(y_train, (-1, 1))
y_test = np.reshape(y_test, (-1, 1))

In [11]:
'''
Normalize X so all features have a mean of 0 and standard deviation of 1
'''

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
'''
Create neural network
We have 11 features, and define 3 fully connected layers
'''

net = tflearn.input_data(shape=[None, 11])
net = tflearn.fully_connected(net, 6, activation='relu')
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, 6, activation='relu')
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, 1, activation='tanh')
net = tflearn.regression(net)

Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [13]:
'''
Define model and train
'''

model = tflearn.DNN(net)
model.fit(X_train, y_train, n_epoch=10, batch_size=16, validation_set=(X_test, y_test), show_metric=True, run_id="dense_model")

Training Step: 4999  | total loss: [1m[32mnan[0m[0m | time: 1.603s
| Adam | epoch: 010 | loss: nan - binary_acc: 0.7824 -- iter: 7984/8000
Training Step: 5000  | total loss: [1m[32mnan[0m[0m | time: 2.667s
| Adam | epoch: 010 | loss: nan - binary_acc: 0.7729 | val_loss: nan - val_acc: 0.7975 -- iter: 8000/8000
--
