# Hyperparameter Tuning - Churn Modelling

In [1]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
#np.set_printoptions(threshold=np.inf)
np.set_printoptions(threshold=1000)

# Importing the Dataset

In [2]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
# Checking null Values
df.isnull().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [4]:
x= df.iloc[:,3:-1]
print(type(x))
x

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.00,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.80,3,1,0,113931.57
3,699,France,Female,39,1,0.00,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10
...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77
9997,709,France,Female,36,7,0.00,1,0,1,42085.58
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52


In [5]:
y= df.iloc[:,-1].values
print(type(y))
y

<class 'numpy.ndarray'>


array([1, 0, 1, ..., 1, 1, 0], dtype=int64)

In [6]:
df.iloc[:,-1].value_counts()

Exited
0    7963
1    2037
Name: count, dtype: int64

# Encoding Categorical Data

In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder

In [8]:
ct = ColumnTransformer(transformers=[('Country',OneHotEncoder(),[1]),('Gender',OrdinalEncoder(),[2])],remainder='passthrough')
x= ct.fit_transform(x)
# Avoiding Dummy variable Trap
x=x[:,1:]

# Spliting the Dataset

In [9]:
from sklearn.model_selection import train_test_split
X, x_hold_out ,Y, y_hold_out = train_test_split(x,y,test_size=0.2,random_state=42,stratify=y)
print(X.shape,x_hold_out.shape,Y.shape,y_hold_out.shape)

(8000, 11) (2000, 11) (8000,) (2000,)


In [10]:
print(f"Ratio in hold_out_set -> {Counter(y_hold_out)[1]/len(y_hold_out)}")
print(f"Ratio in for cross validate set -> {round(Counter(Y)[1]/len(Y),3)}")

Ratio in hold_out_set -> 0.2035
Ratio in for cross validate set -> 0.204


# Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler
sc= StandardScaler()
X=sc.fit_transform(X)
x_hold_out=sc.transform(x_hold_out)

# Building Neural Network

In [12]:
import tensorflow as tf
tf.__version__

'2.13.0'

## Hyperparameter

In [13]:
params={
    'batch_size':[20,25],
    'epochs':[50,70],
    'model__neurons_1':[6,7],
    'model__neurons_2':[4,3],
    'model__activation':['relu','softmax'],
    'model__optimizer':['adam','rmsprop'],
    'model__dropout':[0.1,0.2]
    }

In [14]:
def create_model(neurons_1,neurons_2,activation,optimizer,dropout):
    nn = tf.keras.Sequential()
    nn.add(tf.keras.layers.Input(shape=11))
    nn.add(tf.keras.layers.Dense(units=neurons_1,activation=activation,kernel_initializer='glorot_uniform'))
    nn.add(tf.keras.layers.Dropout(rate=dropout))
    nn.add(tf.keras.layers.Dense(units=neurons_2,activation=activation))    
    nn.add(tf.keras.layers.Dropout(rate=dropout))
    nn.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))
    nn.compile(optimizer=optimizer,loss='binary_crossentropy',metrics=['accuracy'])
    return nn

In [15]:
from scikeras.wrappers import KerasClassifier
model = KerasClassifier(model=create_model)

# GridSearchCV

In [16]:
from sklearn.model_selection import GridSearchCV
gs= GridSearchCV(estimator=model, param_grid=params,scoring='accuracy',cv=10,n_jobs=-1,return_train_score=True,verbose=0)

In [17]:
gs=gs.fit(X,Y)



Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


In [18]:
gs.best_score_

0.86175

In [19]:
gs.best_params_

{'batch_size': 25,
 'epochs': 70,
 'model__activation': 'softmax',
 'model__dropout': 0.1,
 'model__neurons_1': 6,
 'model__neurons_2': 4,
 'model__optimizer': 'adam'}