In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df=pd.read_csv('Churn_Modelling.csv')

In [3]:
df.shape

(10000, 14)

In [4]:
df.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [11]:
df.head()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,619,France,1,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,608,Spain,1,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,502,France,1,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,699,France,1,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,850,Spain,1,43,2,125510.82,1,1,1,79084.1,0


In [7]:
df['Geography'].unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [3]:
df=df.drop('Surname',axis=1)

In [4]:
df['Gender']=np.where(df['Gender']=='Male',0,1)

In [5]:
country=pd.get_dummies(df.Geography)

In [17]:
X=df[['CreditScore','Gender','Age','Tenure','Balance','NumOfProducts','HasCrCard','IsActiveMember','EstimatedSalary']]

In [18]:
X=pd.concat([X,country],axis=1)

In [19]:
X.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,France,Germany,Spain
0,619,1,42,2,0.0,1,1,1,101348.88,1,0,0
1,608,1,41,1,83807.86,1,0,1,112542.58,0,0,1
2,502,1,42,8,159660.8,3,1,0,113931.57,1,0,0
3,699,1,39,1,0.0,2,0,0,93826.63,1,0,0
4,850,1,43,2,125510.82,1,1,1,79084.1,0,0,1


In [20]:
Y=df['Exited']

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split (X, Y, test_size=.4, random_state=10)

In [26]:
from sklearn.neural_network import MLPClassifier

# Establish and fit the model, with a single, 1000 perceptron layer.
mlp = MLPClassifier(hidden_layer_sizes=(500,))
mlp.fit(X_train, y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(500,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [27]:
mlp.score(X, Y)

0.7957

In [28]:
import time

# 1
start1 = time.time()

mlp = MLPClassifier(hidden_layer_sizes=(500,))
mlp.fit(X_train, y_train)
print("1. 500, one layer:")
print(mlp.score(X_test, y_test))

end1 = time.time()
print(end1 - start1)

# 2
start2 = time.time()

mlp = MLPClassifier(hidden_layer_sizes=(500,20))
mlp.fit(X_train, y_train)

print("\n2. 500x20, 1000, two layers:")
print(mlp.score(X_test, y_test))

end2 = time.time()
print(end2 - start2)

# 3

start3 = time.time()

mlp = MLPClassifier(hidden_layer_sizes=(500,20,5))
mlp.fit(X_train, y_train)

print("\n3. 500x20x5, 50,000, three layers:")
print(mlp.score(X_test, y_test))

end3 = time.time()
print(end3 - start3)

# 4

start5 = time.time()

mlp = MLPClassifier(hidden_layer_sizes=(100,50,20))
mlp.fit(X_train, y_train)

print("\n4. 100x50x20, 100,000, three layers:")
print(mlp.score(X_test, y_test))

end5 = time.time()
print(end5 - start5)

# 5

start4 = time.time()

mlp = MLPClassifier(hidden_layer_sizes=(1000,20,10))
mlp.fit(X_train, y_train)

print("\n5. 1000x20x10, 200,000, three layers:")
print(mlp.score(X_test, y_test))

end4 = time.time()
print(end4 - start4)

1. 500, one layer:
0.4935
2.3763978481292725

2. 500x20, 1000, two layers:
0.794
1.7386150360107422

3. 500x20x5, 50,000, three layers:
0.794
5.648292064666748

4. 100x50x20, 100,000, three layers:
0.6145
1.5453102588653564

5. 1000x20x10, 200,000, three layers:
0.5515
12.671662092208862


Having multi-layers with less neurons improves both results and runtime. 
It's interesting to see that #3 and #5 have the same score while #5 have much longer runtime. 

In [29]:
from sklearn.model_selection import cross_val_score

cvx_start=time.time()

mlp = MLPClassifier(hidden_layer_sizes=(500,20,5))
print(cross_val_score(mlp, X, Y, cv=4))

cvx_end=time.time()
print(cvx_end-cvx_start)



[0.79608157 0.7964     0.7964     0.71668667]
24.76563572883606


It's amazing how consistent the scores are!

In [30]:
from sklearn import ensemble

rfc_start=time.time()

rfc = ensemble.RandomForestClassifier(n_estimators=200)
print(cross_val_score(rfc, X, Y, cv=4))

rfc_end=time.time()
print(rfc_end-rfc_start)

[0.86645342 0.8664     0.8644     0.85994398]
7.889668941497803


RFC shows higher score with much faster runtime. 

In [35]:
rfc.fit(X,Y)
rfc.feature_importances_

array([0.14493594, 0.01808344, 0.23951188, 0.08266167, 0.14215271,
       0.12780264, 0.01873835, 0.03986622, 0.14605964, 0.01046572,
       0.02058998, 0.0091318 ])

In [38]:
X.columns

Index(['CreditScore', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts',
       'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'France', 'Germany',
       'Spain'],
      dtype='object')