In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense,Dropout,BatchNormalization
from keras_tuner.tuners import RandomSearch
from keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, accuracy_score


import warnings
warnings.filterwarnings("ignore")
sns.set_style("darkgrid")

In [7]:
df = pd.read_csv("../data/churn_bank.csv")

In [8]:
df.shape

(10000, 14)

In [9]:
df.sample(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
9448,9449,15628274,Ferri,583,Germany,Male,35,8,149995.72,2,1,0,42143.55,0
5958,5959,15741719,DeRose,540,France,Female,40,3,165298.12,1,0,1,199862.75,0
7093,7094,15644453,Loggia,606,Germany,Female,41,4,132670.53,1,1,0,156476.36,1
3039,3040,15666141,Baldwin,829,Spain,Female,26,8,101440.36,2,1,1,19324.5,0
7453,7454,15702571,Wright,778,Germany,Female,35,1,151958.19,3,1,1,131238.37,1


In [10]:
X = df.iloc[:, 3:13]

In [11]:
X.sample(1)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
9475,616,France,Male,46,2,0.0,2,1,0,137136.46


In [None]:
y = df.iloc[:, 13]
y.sample(1)

In [None]:
df["Geography"].value_counts()

In [None]:
geo = pd.get_dummies(X["Geography"],drop_first=True)
gender = pd.get_dummies(X["Gender"],drop_first=True)

X = pd.concat([X,geo,gender], axis=1)
X=X.drop(["Geography","Gender"],axis =1)


Scaling 📏

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state=42)

In [None]:
X_train.shape

In [None]:
clf = Sequential()
clf.add(Dense(units=10, kernel_initializer = "he_normal", activation = "relu", input_dim = 11))
clf.add(Dense(units=10, kernel_initializer = "he_normal", activation = "relu"))
clf.add(Dense(units=1, kernel_initializer = "glorot_uniform", activation = "sigmoid"))

clf.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

history = model = clf.fit(X_train, y_train, validation_split=0.33, batch_size = 10, epochs = 50)

In [None]:
y_pred = clf.predict(X_test)
y_pred = (y_pred>0.5)

In [None]:
cv = confusion_matrix(y_test, y_pred)

acc = accuracy_score(y_test, y_pred)

print("Accuracy: ",acc)

cv

In [None]:
plt.figure(figsize = (8,6))
plt.plot(model.history["loss"],label = "Loss")
plt.plot(model.history["val_loss"],label = "Val_Loss")
plt.legend()
plt.title("Overfitting: Loss VS Val_Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()

In [None]:
plt.figure(figsize = (8,6))
plt.plot(model.history["accuracy"],label = "Accuracy")
plt.plot(model.history["val_accuracy"],label = "Val_Accuracy")
plt.legend()
plt.title("Overfitting: Accuracy VS Val_Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.show()