In [227]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import pickle

In [228]:
## Load the dataset
df = pd.read_csv('Churn_Modelling.csv')

In [229]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [230]:
##Preprocess the data
## We will drop the irrelevant columns
df = df.drop(['RowNumber','CustomerId','Surname'], axis=1)
df.head()


Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [231]:
## Encoding the categorical variables
label_encoder_gender = LabelEncoder()
df['Gender'] = label_encoder_gender.fit_transform(df['Gender'])
df.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [232]:
## As in the column Geography there are more than two variables so we will use OneHot Encoder

from sklearn.preprocessing import OneHotEncoder

onehot_encoder_geo = OneHotEncoder()
encoded_geo = onehot_encoder_geo.fit_transform(df[['Geography']])
encoded_geo

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 10000 stored elements and shape (10000, 3)>

In [233]:
onehot_encoder_geo.get_feature_names_out(['Geography'])

array(['Geography_France', 'Geography_Germany', 'Geography_Spain'],
      dtype=object)

In [234]:
geo_encoded_df = pd.DataFrame(encoded_geo.toarray(), columns=onehot_encoder_geo.get_feature_names_out(['Geography']))

geo_encoded_df

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,1.0,0.0,0.0
3,1.0,0.0,0.0
4,0.0,0.0,1.0
...,...,...,...
9995,1.0,0.0,0.0
9996,1.0,0.0,0.0
9997,1.0,0.0,0.0
9998,0.0,1.0,0.0


In [235]:
## As the give files we are going to use again and again so let us use them in pickle file
## Save the encoder and scaler

with open('label_encoder_gender.pkl','wb') as file:
    pickle.dump(label_encoder_gender, file)

with open('onehot_encoder_geo.pkl','wb') as file:
    pickle.dump(onehot_encoder_geo, file)


In [236]:
df = pd.concat([df.drop('Geography', axis=1), geo_encoded_df], axis=1)
df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [237]:
##Diving the data into independent and dependent features
X = df.drop('Exited',axis = 1)
y = df['Exited']

##Splitting the data in training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2,random_state=42)

##Standardizion of data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [238]:
with open('scaler.pkl','wb') as file:
    pickle.dump(scaler, file)

## ANN Implementation


In [239]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
import datetime

In [240]:
X_train.shape[1]

12

In [241]:
model = Sequential([
    Dense(64, activation='relu',input_shape=(X_train.shape[1],)), ## HL-1
    Dense(32, activation='relu'), ## HL - 2
    Dense(1, activation='sigmoid') ## Output Layer
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [242]:
model.summary()

In [243]:
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
loss = tf.keras.losses.BinaryCrossentropy()

In [244]:
## complile the model
# model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])

model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])

In [245]:
## Set up the Tensorboard

##Create directory
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%H%S")

tensorflow_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [246]:
## Set up early stopping 
## As if we will set 100 epochs than let say after 50 epochs the loss function became constant then no need to run till 100 epochs so for that we use early stopping
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [247]:
## Train the model
history = model.fit(
    X_train, y_train, validation_data = (X_test, y_test), epochs = 100, 
    callbacks = [tensorflow_callback, early_stopping_callback]
)


Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8055 - loss: 0.4434 - val_accuracy: 0.8575 - val_loss: 0.3568
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 824us/step - accuracy: 0.8536 - loss: 0.3581 - val_accuracy: 0.8455 - val_loss: 0.3550
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 790us/step - accuracy: 0.8619 - loss: 0.3364 - val_accuracy: 0.8635 - val_loss: 0.3359
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 795us/step - accuracy: 0.8578 - loss: 0.3418 - val_accuracy: 0.8540 - val_loss: 0.3446
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 749us/step - accuracy: 0.8736 - loss: 0.3176 - val_accuracy: 0.8620 - val_loss: 0.3494
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 751us/step - accuracy: 0.8629 - loss: 0.3331 - val_accuracy: 0.8575 - val_loss: 0.3477
Epoch 7/100


In [248]:
model.save('model.h5')



In [249]:
## LOAD Tensorboard Extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [250]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 35935), started 3:28:13 ago. (Use '!kill 35935' to kill it.)