In [11]:
## ANN

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# importing the liberary which is responsible for creating ANN

import keras
from keras.models import Sequential # It is responsible for creating ANN, CNN 
from keras.layers import Dense, Activation, Embedding, Flatten, BatchNormalization # Hidden layer
from keras.layers import LeakyReLU, PReLU, ELU # activation function
from keras.layers import Dropout # It is regularization paramenter


In [2]:
# Reading the data
df = pd.read_csv('Churn_Modelling.csv')
print(df)


      RowNumber  CustomerId    Surname  ...  IsActiveMember EstimatedSalary Exited
0             1    15634602   Hargrave  ...               1       101348.88      1
1             2    15647311       Hill  ...               1       112542.58      0
2             3    15619304       Onio  ...               0       113931.57      1
3             4    15701354       Boni  ...               0        93826.63      0
4             5    15737888   Mitchell  ...               1        79084.10      0
...         ...         ...        ...  ...             ...             ...    ...
9995       9996    15606229   Obijiaku  ...               0        96270.64      0
9996       9997    15569892  Johnstone  ...               1       101699.77      0
9997       9998    15584532        Liu  ...               1        42085.58      1
9998       9999    15682355  Sabbatini  ...               0        92888.52      1
9999      10000    15628319     Walker  ...               0        38190.78      0

[10

In [3]:
# Data seperation 
x = df.iloc[:, 3:13]
print('The x data is \n', x.head())

y = df.iloc[:, 13]
print('The y data is \n', y.head())


The x data is 
    CreditScore Geography  Gender  ...  HasCrCard  IsActiveMember  EstimatedSalary
0          619    France  Female  ...          1               1        101348.88
1          608     Spain  Female  ...          0               1        112542.58
2          502    France  Female  ...          1               0        113931.57
3          699    France  Female  ...          0               0         93826.63
4          850     Spain  Female  ...          1               1         79084.10

[5 rows x 10 columns]
The y data is 
 0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64


In [4]:
# checking the null values 

print('The null value for x column is \n', x.isnull().sum())
print('The null value for y column is \n', y.isnull().sum())

# checking the columns name 
print(x.columns)


The null value for x column is 
 CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
dtype: int64
The null value for y column is 
 0
Index(['CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance',
       'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary'],
      dtype='object')


In [5]:
# creating a dummy variables for categorical variable 
geography = pd.get_dummies(x['Geography'], drop_first = True)
print(geography)
gender = pd.get_dummies(x['Gender'], drop_first = True)
print(gender)


      Germany  Spain
0           0      0
1           0      1
2           0      0
3           0      0
4           0      1
...       ...    ...
9995        0      0
9996        0      0
9997        0      0
9998        1      0
9999        0      0

[10000 rows x 2 columns]
      Male
0        0
1        0
2        0
3        0
4        0
...    ...
9995     1
9996     1
9997     0
9998     1
9999     0

[10000 rows x 1 columns]


In [6]:
# Concatenate the data frame
x = pd.concat([x, geography, gender], axis = 1)
print("The x data is \n",x)


The x data is 
       CreditScore Geography  Gender  Age  ...  EstimatedSalary  Germany  Spain  Male
0             619    France  Female   42  ...        101348.88        0      0     0
1             608     Spain  Female   41  ...        112542.58        0      1     0
2             502    France  Female   42  ...        113931.57        0      0     0
3             699    France  Female   39  ...         93826.63        0      0     0
4             850     Spain  Female   43  ...         79084.10        0      1     0
...           ...       ...     ...  ...  ...              ...      ...    ...   ...
9995          771    France    Male   39  ...         96270.64        0      0     1
9996          516    France    Male   35  ...        101699.77        0      0     1
9997          709    France  Female   36  ...         42085.58        0      0     0
9998          772   Germany    Male   42  ...         92888.52        1      0     1
9999          792    France  Female   28  ...    

In [7]:
# Droping the column 
x = x.drop(['Geography', 'Gender'], axis = 1)
print("The x final data is \n",x)


The x final data is 
       CreditScore  Age  Tenure  ...  Germany  Spain  Male
0             619   42       2  ...        0      0     0
1             608   41       1  ...        0      1     0
2             502   42       8  ...        0      0     0
3             699   39       1  ...        0      0     0
4             850   43       2  ...        0      1     0
...           ...  ...     ...  ...      ...    ...   ...
9995          771   39       5  ...        0      0     1
9996          516   35      10  ...        0      0     1
9997          709   36       7  ...        0      0     0
9998          772   42       3  ...        1      0     1
9999          792   28       4  ...        0      0     0

[10000 rows x 11 columns]


In [8]:
# splitting the data set
x_train, x_test , y_train, y_test = train_test_split(x,y , test_size=0.2, random_state=0)


In [9]:
# scaling the x_train, x_test
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)


In [12]:
## Preforming hyperparameter optimization
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.activations import relu, sigmoid

In [13]:
def create_model(layers, activation):
  model = Sequential()
  for i, nodes in enumerate(layers):
    if i == 0:
      model.add(Dense(nodes, input_dim = x_train.shape[1]))
      model.add(Activation(activation))
      model.add(Dropout(0.3))
    else:
      model.add(Dense(nodes))
      model.add(Activation(activation))
      model.add(Dropout(0.3))
  model.add(Dense(units = 1, kernel_initializer='glorot_uniform', activation = 'sigmoid'))
  model.compile(optimizer='adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
  return model
  

In [14]:
model = KerasClassifier(build_fn=create_model, verbose = 0)


  """Entry point for launching an IPython kernel.


In [16]:
layers = [[20], [40,20], [45, 30, 15]]
activations = ['sigmoid', 'relu']
param_grid = dict(layers = layers, activation = activations, batch_size = [128, 256], epochs = [30])
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv =5)

grid_result = grid.fit(x_train,y_train)

print(grid_result.best_score_, grid_result.best_params_)

0.8566250205039978 {'activation': 'relu', 'batch_size': 128, 'epochs': 30, 'layers': [40, 20]}


In [26]:
pred_y = grid.predict(x_test)
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, pred_y)
print(cm)
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test, pred_y)
print(score)

[[1554   41]
 [ 224  181]]
0.8675
