In [5]:
# dataset: bank costumers are leaving the bank. your job is to build a model to figure out,
# why, and who will potentially leave the bank (exited column). (the bank can target them with special offer)

In [26]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

# 1. preprocessing

In [7]:
df = pd.read_csv("./Churn_Modelling.csv")
print(df.shape)
df.head()

(10000, 14)


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [10]:
# remove unnecessary columns
# no missing data
X_independent = df.iloc[:, 3:-1]
y_dependent = df.iloc[:, -1]
print(X_independent.head())
print("")
print(y_dependent.head())

   CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0          619    France  Female   42       2       0.00              1   
1          608     Spain  Female   41       1   83807.86              1   
2          502    France  Female   42       8  159660.80              3   
3          699    France  Female   39       1       0.00              2   
4          850     Spain  Female   43       2  125510.82              1   

   HasCrCard  IsActiveMember  EstimatedSalary  
0          1               1        101348.88  
1          0               1        112542.58  
2          1               0        113931.57  
3          0               0         93826.63  
4          1               1         79084.10  

0    1
1    0
2    1
3    0
4    0
Name: Exited, dtype: int64


In [11]:
y_dependent = np.array(y_dependent).reshape(-1, 1)
ct_geography = ColumnTransformer(transformers=[("encoder", OneHotEncoder(), [1])], remainder="passthrough")
X_independent = np.array(ct_geography.fit_transform(X_independent))

# this can be used to see, the transformed dataset as a dataframe with the names.
# why? -> transforming more columns -> chaotic to see what is what.
#X_new_df = pd.DataFrame(X_independent, columns=ct_geography.get_feature_names_out())
#print("new_df with labels: ", X_new_df)

ct_gender = ColumnTransformer(transformers=[("transformer", OneHotEncoder(), [4])], remainder="passthrough")
X_independent = np.array(ct_gender.fit_transform(X_independent))
print("first line: ", X_independent[0])
print("")
print(type(X_independent))
print(X_independent)
print("")
print("type of y_dependent: ", type(y_dependent))
print("y_dependent: ", y_dependent)



first line:  [1.0 0.0 1.0 0.0 0.0 619 42 2 0.0 1 1 1 101348.88]

<class 'numpy.ndarray'>
[[1.0 0.0 1.0 ... 1 1 101348.88]
 [1.0 0.0 0.0 ... 0 1 112542.58]
 [1.0 0.0 1.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 1.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 1.0 ... 1 0 38190.78]]

type of y_dependent:  <class 'numpy.ndarray'>
y_dependent:  [[1]
 [0]
 [1]
 ...
 [1]
 [1]
 [0]]


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X_independent, y_dependent, test_size=0.3, random_state=101)
print("X_train shape: ", X_train.shape)
print("X_train: \n", X_train)
print("")
print("y_test shape: ", y_test.shape)
print("y_test: \n", y_test)

X_train shape:  (7000, 13)
X_train: 
 [[1.0 0.0 0.0 ... 0 1 140676.98]
 [0.0 1.0 1.0 ... 1 1 141476.56]
 [0.0 1.0 0.0 ... 1 0 22447.85]
 ...
 [0.0 1.0 0.0 ... 1 1 138051.19]
 [0.0 1.0 1.0 ... 1 1 141822.8]
 [1.0 0.0 0.0 ... 1 1 96658.26]]

y_test shape:  (3000, 1)
y_test: 
 [[0]
 [0]
 [1]
 ...
 [0]
 [0]
 [1]]


In [13]:
std_scaler = StandardScaler()
X_train_std_scaled = std_scaler.fit_transform(X_train)
X_test_std_scaled = std_scaler.transform(X_test)

print("first line: ", X_train_std_scaled[0])
print("X_train_std_scaled: ", X_train_std_scaled)
print("")
print("X_test_std_scaled: ", X_test_std_scaled)

first line:  [ 1.09759938 -1.09759938 -1.01496917 -0.57339125  1.75478035 -1.45057405
 -0.93156572  1.37687702 -1.23805017  0.80996205 -1.56090554  0.9793559
  0.70308176]
X_train_std_scaled:  [[ 1.09759938 -1.09759938 -1.01496917 ... -1.56090554  0.9793559
   0.70308176]
 [-0.91107923  0.91107923  0.9852516  ...  0.64065376  0.9793559
   0.71692473]
 [-0.91107923  0.91107923 -1.01496917 ...  0.64065376 -1.02107926
  -1.34379489]
 ...
 [-0.91107923  0.91107923 -1.01496917 ...  0.64065376  0.9793559
   0.657622  ]
 [-0.91107923  0.91107923  0.9852516  ...  0.64065376  0.9793559
   0.72291911]
 [ 1.09759938 -1.09759938 -1.01496917 ...  0.64065376  0.9793559
  -0.0590053 ]]

X_test_std_scaled:  [[-0.91107923  0.91107923 -1.01496917 ...  0.64065376  0.9793559
   0.40123544]
 [-0.91107923  0.91107923  0.9852516  ...  0.64065376  0.9793559
  -0.64634549]
 [ 1.09759938 -1.09759938  0.9852516  ... -1.56090554  0.9793559
   1.23986288]
 ...
 [-0.91107923  0.91107923  0.9852516  ...  0.64065376 

In [14]:
minmax_scaler = MinMaxScaler()
X_train_minmax_scaled = minmax_scaler.fit_transform(X_train)
X_test_minmax_scaled = minmax_scaler.transform(X_test)

print("X_train_minmax_scaled: ", X_train_minmax_scaled)
print("")
print("X_test_minmax_scaled: ", X_test_minmax_scaled)

X_train_minmax_scaled:  [[1.         0.         0.         ... 0.         1.         0.70347065]
 [0.         1.         1.         ... 1.         1.         0.70746937]
 [0.         1.         0.         ... 1.         0.         0.11220426]
 ...
 [0.         1.         0.         ... 1.         1.         0.69033902]
 [0.         1.         1.         ... 1.         1.         0.70920092]
 [1.         0.         0.         ... 1.         1.         0.4833321 ]]

X_test_minmax_scaled:  [[0.         1.         0.         ... 1.         1.         0.61627849]
 [0.         1.         1.         ... 1.         1.         0.3136714 ]
 [1.         0.         1.         ... 0.         1.         0.85852671]
 ...
 [0.         1.         1.         ... 1.         1.         0.49850715]
 [0.         1.         0.         ... 1.         0.         0.24493352]
 [1.         0.         0.         ... 1.         0.         0.22511922]]


# 2. building the network

In [15]:
X_train_std_scaled = torch.from_numpy(X_train_std_scaled).float()
X_test_std_scaled = torch.from_numpy(X_test_std_scaled).float()
y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test)

X_train_minmax_scaled = torch.from_numpy(X_train_minmax_scaled).float()
X_test_minmax_scaled = torch.from_numpy(X_test_minmax_scaled).float()

In [16]:
models = []

In [17]:
# hyperparameters

scaler = "std"
if scaler == "std":
    scaled_set = X_train_std_scaled
elif scaler == "minmax":
    scaled_set == X_train_minmax_scaled

number_of_neurons = 8
learning_batch_size = 32
n_epochs = 200
alpha = 0.001


In [19]:
class Network(nn.Module):


    def __init__(self, input_size, seed=101):
        super().__init__()
        self.seed = torch.manual_seed(seed)
        self.fcl1 = nn.Linear(input_size, number_of_neurons)
        self.fcl2 = nn.Linear(number_of_neurons, number_of_neurons)
        self.fcl3 = nn.Linear(number_of_neurons, 1)


    def forward(self, data):
        signal = self.fcl1(data)
        signal = F.relu(signal)
        signal = self.fcl2(signal)
        signal = F.relu(signal)
        signal = self.fcl3(signal)
        return F.sigmoid(signal)


In [20]:
# create a torch Dataloader -> A DataLoader takes a dataset and provides an iterator over batches of data.
# It handles batching, shuffling, and other data-loading functionalities, making it easier to work with large datasets.
# the primary input dataset of the Dataloader is torch.utils.data.TensorDataset, BUT
# any iterable can be used (e.g: torch.tensor, tuple), but X and y (independent and dependent variables) must be separeted
# e.g: Dataloader(list(zip(X_train, y_train)))

from torch.utils.data import DataLoader


dataloader = DataLoader(list(zip(scaled_set, y_train)), batch_size=learning_batch_size, shuffle=False)

In [21]:
# rather "bridge" than "brain"
brain = Network(scaled_set.shape[1])
optimizer = Adam(brain.parameters(), lr=alpha)

# 3. train the model

In [22]:
for epoch in range(1, n_epochs + 1):
    for features, labels in dataloader:
        prediction = brain(features)
        #print("feature: ", features)
        #print("label: ", labels)
        #print("prediction: ", prediction)
        loss = F.binary_cross_entropy(prediction, labels)

        #backpropagate
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # visualization:
    # keep every X episode on the screen.
    if epoch % 10 == 0: # if the remnant is 0, that means we are in a X step.
        print("\r Epoch: {} \t Loss: {:.4f}".format(epoch, loss.data))


"""
2 way to save:
    1. save with state_dict() -> only the weights and biases of the layers. -> smaller, flexible (build different architecture on it) -> common way.
    2. save the entire model -> complete, all the aspects incl. architechture -> 
"""
save_name = "bank_business_checkpoint.pth"
torch.save(brain.state_dict(), save_name)
print("saved as: ", save_name)

 Epoch: 10 	 Loss: 0.2133
 Epoch: 20 	 Loss: 0.1224
 Epoch: 30 	 Loss: 0.1163
 Epoch: 40 	 Loss: 0.1132
 Epoch: 50 	 Loss: 0.1160
 Epoch: 60 	 Loss: 0.1138
 Epoch: 70 	 Loss: 0.1163
 Epoch: 80 	 Loss: 0.1183
 Epoch: 90 	 Loss: 0.1210
 Epoch: 100 	 Loss: 0.1231
 Epoch: 110 	 Loss: 0.1247
 Epoch: 120 	 Loss: 0.1274
 Epoch: 130 	 Loss: 0.1267
 Epoch: 140 	 Loss: 0.1250
 Epoch: 150 	 Loss: 0.1269
 Epoch: 160 	 Loss: 0.1287
 Epoch: 170 	 Loss: 0.1287
 Epoch: 180 	 Loss: 0.1295
 Epoch: 190 	 Loss: 0.1284
 Epoch: 200 	 Loss: 0.1294
saved as:  bank_business_checkpoint.pth


# 4. check model accuracy

In [31]:
if scaler == "std":
    test_X = X_test_std_scaled
elif scaler == "minmax":
    test_X = X_test_minmax_scaled

brain.eval()
with torch.no_grad():
    y_pred = brain(test_X)

# convert probabilities to 0 or 1
threshold = 0.5
y_pred_int = (y_pred >= threshold)

# boolean masking
compared = (y_pred_int == y_test)
print("compared shape: ", compared.shape, "\n")
print("compared: ", compared, "\n\n")

# accuracy
accuracy = torch.sum(compared) / len(y_test)
print(f"accuracy: {accuracy :.4f}")

# confusion matrix
matrix_acc = confusion_matrix(y_test, y_pred_int)
print("confusin matrix: \n", matrix_acc)

# save model test infos
models.append({
    "last_loss" : loss.data,
    "accuracy" : accuracy,
    "scaler" : scaler,
    "number_of_neurons" : number_of_neurons,
    "learning_batch_size" : learning_batch_size,
    "n_epochs" : n_epochs,
    "alpha" : alpha
})

compared shape:  torch.Size([3000, 1]) 

compared:  tensor([[ True],
        [ True],
        [ True],
        ...,
        [ True],
        [False],
        [False]]) 


accuracy: 0.8663
confusin matrix: 
 [[2260  118]
 [ 283  339]]


In [32]:
for elem in models:
    print(elem)

{'last_loss': tensor(0.1294), 'accuracy': tensor(0.8663), 'scaler': 'std', 'number_of_neurons': 8, 'learning_batch_size': 32, 'n_epochs': 200, 'alpha': 0.001}
{'last_loss': tensor(0.1294), 'accuracy': tensor(0.8663), 'scaler': 'std', 'number_of_neurons': 8, 'learning_batch_size': 32, 'n_epochs': 200, 'alpha': 0.001}
{'last_loss': tensor(0.1294), 'accuracy': tensor(0.8663), 'scaler': 'std', 'number_of_neurons': 8, 'learning_batch_size': 32, 'n_epochs': 200, 'alpha': 0.001}
{'last_loss': tensor(0.1294), 'accuracy': tensor(0.8663), 'scaler': 'std', 'number_of_neurons': 8, 'learning_batch_size': 32, 'n_epochs': 200, 'alpha': 0.001}


In [34]:
# predict from random data
random_data = [0, 1, 1, 0, 0, 600, 40, 3, 60000, 2, 1, 1, 50000]
brain.eval()
with torch.no_grad():
    prediction = brain(torch.tensor(std_scaler.transform([random_data])).float())
print("prediction: ", str(prediction.data.numpy() * 100) + "%")

prediction:  [[1.942867]]%
