In [2]:
# Artificial Neural Network

# Installing Theano
# pip install --upgrade --no-deps git+git://github.com/Theano/Theano.git

# Installing Tensorflow
# pip install tensorflow

# Installing Keras
# pip install --upgrade keras

# Part 1 - Data Preprocessing

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')


In [4]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [14]:
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [15]:
X[0:5]

array([[619, 'France', 'Female', 42, 2, 0.0, 1, 1, 1, 101348.88],
       [608, 'Spain', 'Female', 41, 1, 83807.86, 1, 0, 1, 112542.58],
       [502, 'France', 'Female', 42, 8, 159660.8, 3, 1, 0, 113931.57],
       [699, 'France', 'Female', 39, 1, 0.0, 2, 0, 0, 93826.63],
       [850, 'Spain', 'Female', 43, 2, 125510.82, 1, 1, 1, 79084.1]],
      dtype=object)

In [16]:
X.shape

(10000, 10)

In [19]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])

In [20]:
X.shape

(10000, 10)

In [21]:
X_train[0:5]

array([[667, 'Spain', 'Female', 34, 5, 0.0, 2, 1, 0, 163830.64],
       [427, 'Germany', 'Male', 42, 1, 75681.52, 1, 1, 1, 57098.0],
       [535, 'France', 'Female', 29, 2, 112367.34, 1, 1, 0, 185630.76],
       [654, 'Spain', 'Male', 40, 5, 105683.63, 1, 1, 0, 173617.09],
       [850, 'Spain', 'Female', 57, 8, 126776.3, 2, 1, 1, 132298.49]],
      dtype=object)

In [22]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [23]:
print(X_train.shape,y_train.shape)

(8000, 10) (8000,)


In [24]:
X_train[0:5]

array([[667, 2, 0, 34, 5, 0.0, 2, 1, 0, 163830.64],
       [427, 1, 1, 42, 1, 75681.52, 1, 1, 1, 57098.0],
       [535, 0, 0, 29, 2, 112367.34, 1, 1, 0, 185630.76],
       [654, 2, 1, 40, 5, 105683.63, 1, 1, 0, 173617.09],
       [850, 2, 0, 57, 8, 126776.3, 2, 1, 1, 132298.49]], dtype=object)

In [25]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [26]:
print(X_train.shape,X_test.shape)

(8000, 10) (2000, 10)


#Using Dropout Regularization to reduce overfitting if needed and Tunning the ANN

In [27]:
# Improving the ANN
# Dropout Regularization to reduce overfitting if needed

# Tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping

def build_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 75, kernel_initializer = 'uniform', activation = 'relu', input_dim = 10))
    classifier.add(Dense(units = 50, kernel_initializer = 'uniform', activation = 'relu'))
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
    return classifier

In [29]:
# simple early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)

In [30]:
classifier = KerasClassifier(build_fn = build_classifier)
parameters = {'batch_size': [25, 32],
              'epochs': [100, 500],
              'optimizer': ['adam', 'rmsprop']}
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 10)
grid_search = grid_search.fit(X_train, y_train,validation_data=(X_test, y_test),callbacks=[es])

  """Entry point for launching an IPython kernel.


Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100
Epoch 2/100
Epoch 00002: early stopping
Epoch 1/100


In [31]:
best_parameters = grid_search.best_params_
print("best_parameters: ",best_parameters)
best_accuracy = grid_search.best_score_
print("best_accuracy: ",best_accuracy)

best_parameters:  {'batch_size': 25, 'epochs': 500, 'optimizer': 'adam'}
best_accuracy:  0.8305
