In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.auto import tqdm

In [2]:
dataset = pd.read_csv('data/churn_modelling_data.csv')
dataset

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [3]:
X = dataset.iloc[:,3:13].values # Independent values
y = dataset.iloc[:,13].values   # Dependent values
X, y

(array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
        [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
        [502, 'France', 'Female', ..., 1, 0, 113931.57],
        ...,
        [709, 'France', 'Female', ..., 0, 1, 42085.58],
        [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
        [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object),
 array([1, 0, 1, ..., 1, 1, 0]))

The function `LabelEncoder()` converts string values to numerical values. I this case we will use it to replace string values of attribute gender: female and male.

In [4]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
#label_encoder = 
X[:,2] = LabelEncoder().fit_transform(X[:,2]) 

In [5]:
print(X)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]
[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


The option `reaminder = passthrough` left the other columns without any change.

In [6]:
from sklearn.compose import ColumnTransformer
column_transformer = ColumnTransformer(transformers=[('ohe', OneHotEncoder(),[1])],
                                       remainder='passthrough')

After we transformed the first columns, we have to fit it, transform the dataset to label enconde and one-hot encode the column.

In [7]:
X = np.array(column_transformer.fit_transform(X), dtype = str)
print(X)

[['1.0' '0.0' '0.0' ... '1' '1' '101348.88']
 ['0.0' '0.0' '1.0' ... '0' '1' '112542.58']
 ['1.0' '0.0' '0.0' ... '1' '0' '113931.57']
 ...
 ['1.0' '0.0' '0.0' ... '0' '1' '42085.58']
 ['0.0' '1.0' '0.0' ... '1' '0' '92888.52']
 ['1.0' '0.0' '0.0' ... '1' '0' '38190.78']]
[['1.0' '0.0' '0.0' ... '1' '1' '101348.88']
 ['0.0' '0.0' '1.0' ... '0' '1' '112542.58']
 ['1.0' '0.0' '0.0' ... '1' '0' '113931.57']
 ...
 ['1.0' '0.0' '0.0' ... '0' '1' '42085.58']
 ['0.0' '1.0' '0.0' ... '1' '0' '92888.52']
 ['1.0' '0.0' '0.0' ... '1' '0' '38190.78']]


In [8]:
X = X[:,1:]
print(X)

[['0.0' '0.0' '619' ... '1' '1' '101348.88']
 ['0.0' '1.0' '608' ... '0' '1' '112542.58']
 ['0.0' '0.0' '502' ... '1' '0' '113931.57']
 ...
 ['0.0' '0.0' '709' ... '0' '1' '42085.58']
 ['1.0' '0.0' '772' ... '1' '0' '92888.52']
 ['0.0' '0.0' '792' ... '1' '0' '38190.78']]
[['0.0' '0.0' '619' ... '1' '1' '101348.88']
 ['0.0' '1.0' '608' ... '0' '1' '112542.58']
 ['0.0' '0.0' '502' ... '1' '0' '113931.57']
 ...
 ['0.0' '0.0' '709' ... '0' '1' '42085.58']
 ['1.0' '0.0' '772' ... '1' '0' '92888.52']
 ['0.0' '0.0' '792' ... '1' '0' '38190.78']]


The first two values are:

- `0.0` `0.0`: France
- `0.0` `1.0`: Spain
- `1.0` `0.0`: Germany

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=0)

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train = scaler.fit_transform(X=X_train) 
X_test = scaler.transform(X=X_test)

Standard Scale: 
$$z = \frac{(x - \mu)}{\sigma}$$

In [11]:
import torch
from torch import nn

In [12]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [13]:
class Classifier(nn.Module):
    
    def __init__(self):
        super().__init__()
        # We have 11 independent variables, including two columns of geography
        # We have 6 neurons
        self.linear_1 = nn.Linear(in_features=11, out_features=6)
        self.linear_2 = nn.Linear(in_features=6, out_features=6)
        self.linear_3 = nn.Linear(in_features=6, out_features=1)
        
    def forward(self, x):
        x = torch.relu(self.linear_1(x))
        x = torch.relu(self.linear_2(x))
        x = torch.sigmoid(self.linear_3(x))
        return x       

In [14]:
model = Classifier()
loss_function = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)

In [15]:
num_epochs = 100
batch_size = 10
for epoch in tqdm(range(num_epochs)):
    for i in range(0, len(X_train), batch_size):
        model.train()
        
        X_inputs = X_train[i:i+batch_size]
        y_labels = y_train[i:i+batch_size].unsqueeze(1)
        
        # Forward pass
        y_pred = model(X_inputs)
        loss = loss_function(y_pred, y_labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  0%|          | 0/100 [00:00<?, ?it/s]

  1%|          | 1/100 [00:01<02:03,  1.25s/it]

  2%|▏         | 2/100 [00:02<02:14,  1.37s/it]

  3%|▎         | 3/100 [00:03<01:59,  1.23s/it]

  4%|▍         | 4/100 [00:05<02:04,  1.30s/it]

  5%|▌         | 5/100 [00:06<01:57,  1.24s/it]

  6%|▌         | 6/100 [00:07<01:52,  1.20s/it]

  7%|▋         | 7/100 [00:08<01:58,  1.28s/it]

  8%|▊         | 8/100 [00:10<02:06,  1.38s/it]

  9%|▉         | 9/100 [00:11<02:03,  1.36s/it]

 10%|█         | 10/100 [00:12<01:52,  1.26s/it]

Epoch [10/100], Loss: 0.3255
Epoch [10/100], Loss: 0.3255


 11%|█         | 11/100 [00:14<01:51,  1.25s/it]

 12%|█▏        | 12/100 [00:14<01:38,  1.12s/it]

 13%|█▎        | 13/100 [00:15<01:28,  1.02s/it]

 14%|█▍        | 14/100 [00:16<01:21,  1.05it/s]

 15%|█▌        | 15/100 [00:17<01:15,  1.13it/s]

 16%|█▌        | 16/100 [00:18<01:24,  1.00s/it]

 17%|█▋        | 17/100 [00:19<01:18,  1.06it/s]

 18%|█▊        | 18/100 [00:20<01:22,  1.01s/it]

 19%|█▉        | 19/100 [00:21<01:18,  1.03it/s]

 20%|██        | 20/100 [00:22<01:14,  1.08it/s]

Epoch [20/100], Loss: 0.2325
Epoch [20/100], Loss: 0.2325


 21%|██        | 21/100 [00:22<01:10,  1.11it/s]

 22%|██▏       | 22/100 [00:23<01:06,  1.17it/s]

 23%|██▎       | 23/100 [00:24<01:05,  1.18it/s]

 24%|██▍       | 24/100 [00:25<01:03,  1.20it/s]

 25%|██▌       | 25/100 [00:26<01:02,  1.20it/s]

 26%|██▌       | 26/100 [00:27<01:02,  1.18it/s]

 27%|██▋       | 27/100 [00:27<01:01,  1.20it/s]

 28%|██▊       | 28/100 [00:28<01:00,  1.19it/s]

 29%|██▉       | 29/100 [00:29<00:59,  1.19it/s]

 30%|███       | 30/100 [00:30<00:59,  1.17it/s]

Epoch [30/100], Loss: 0.1780
Epoch [30/100], Loss: 0.1780


 31%|███       | 31/100 [00:31<00:58,  1.18it/s]

 32%|███▏      | 32/100 [00:32<00:56,  1.21it/s]

 33%|███▎      | 33/100 [00:32<00:53,  1.24it/s]

 34%|███▍      | 34/100 [00:33<00:55,  1.19it/s]

 35%|███▌      | 35/100 [00:34<00:54,  1.18it/s]

 36%|███▌      | 36/100 [00:35<00:52,  1.22it/s]

 37%|███▋      | 37/100 [00:36<00:50,  1.24it/s]

 38%|███▊      | 38/100 [00:36<00:49,  1.24it/s]

 39%|███▉      | 39/100 [00:37<00:50,  1.21it/s]

 40%|████      | 40/100 [00:38<00:48,  1.23it/s]

Epoch [40/100], Loss: 0.1416
Epoch [40/100], Loss: 0.1416


 41%|████      | 41/100 [00:39<00:49,  1.20it/s]

 42%|████▏     | 42/100 [00:40<00:48,  1.20it/s]

 43%|████▎     | 43/100 [00:41<00:48,  1.18it/s]

 44%|████▍     | 44/100 [00:41<00:47,  1.19it/s]

 45%|████▌     | 45/100 [00:42<00:45,  1.20it/s]

 46%|████▌     | 46/100 [00:43<00:45,  1.18it/s]

 47%|████▋     | 47/100 [00:44<00:44,  1.20it/s]

 48%|████▊     | 48/100 [00:45<00:44,  1.18it/s]

 49%|████▉     | 49/100 [00:46<00:43,  1.19it/s]

 50%|█████     | 50/100 [00:47<00:42,  1.17it/s]

Epoch [50/100], Loss: 0.1196
Epoch [50/100], Loss: 0.1196


 51%|█████     | 51/100 [00:47<00:42,  1.16it/s]

 52%|█████▏    | 52/100 [00:48<00:41,  1.16it/s]

 53%|█████▎    | 53/100 [00:49<00:39,  1.18it/s]

 54%|█████▍    | 54/100 [00:50<00:40,  1.14it/s]

 55%|█████▌    | 55/100 [00:51<00:39,  1.14it/s]

 56%|█████▌    | 56/100 [00:52<00:38,  1.15it/s]

 57%|█████▋    | 57/100 [00:53<00:36,  1.17it/s]

 58%|█████▊    | 58/100 [00:54<00:36,  1.16it/s]

 59%|█████▉    | 59/100 [00:54<00:35,  1.14it/s]

 60%|██████    | 60/100 [00:55<00:35,  1.13it/s]

Epoch [60/100], Loss: 0.1101
Epoch [60/100], Loss: 0.1101


 61%|██████    | 61/100 [00:56<00:33,  1.16it/s]

 62%|██████▏   | 62/100 [00:57<00:31,  1.20it/s]

 63%|██████▎   | 63/100 [00:58<00:30,  1.22it/s]

 64%|██████▍   | 64/100 [00:59<00:29,  1.22it/s]

 65%|██████▌   | 65/100 [00:59<00:28,  1.22it/s]

 66%|██████▌   | 66/100 [01:00<00:28,  1.20it/s]

 67%|██████▋   | 67/100 [01:01<00:27,  1.20it/s]

 68%|██████▊   | 68/100 [01:02<00:26,  1.20it/s]

 69%|██████▉   | 69/100 [01:03<00:25,  1.21it/s]

 70%|███████   | 70/100 [01:03<00:24,  1.23it/s]

Epoch [70/100], Loss: 0.1056
Epoch [70/100], Loss: 0.1056


 71%|███████   | 71/100 [01:04<00:23,  1.21it/s]

 72%|███████▏  | 72/100 [01:05<00:23,  1.19it/s]

 73%|███████▎  | 73/100 [01:06<00:23,  1.17it/s]

 74%|███████▍  | 74/100 [01:07<00:22,  1.18it/s]

 75%|███████▌  | 75/100 [01:08<00:20,  1.19it/s]

 76%|███████▌  | 76/100 [01:08<00:19,  1.22it/s]

 77%|███████▋  | 77/100 [01:09<00:18,  1.21it/s]

 78%|███████▊  | 78/100 [01:10<00:18,  1.21it/s]

 79%|███████▉  | 79/100 [01:11<00:17,  1.20it/s]

 80%|████████  | 80/100 [01:12<00:16,  1.21it/s]

Epoch [80/100], Loss: 0.1024
Epoch [80/100], Loss: 0.1024


 81%|████████  | 81/100 [01:13<00:16,  1.14it/s]

 82%|████████▏ | 82/100 [01:14<00:15,  1.15it/s]

 83%|████████▎ | 83/100 [01:14<00:14,  1.17it/s]

 84%|████████▍ | 84/100 [01:15<00:13,  1.17it/s]

 85%|████████▌ | 85/100 [01:16<00:12,  1.19it/s]

 86%|████████▌ | 86/100 [01:17<00:11,  1.19it/s]

 87%|████████▋ | 87/100 [01:18<00:11,  1.17it/s]

 88%|████████▊ | 88/100 [01:19<00:09,  1.21it/s]

 89%|████████▉ | 89/100 [01:20<00:09,  1.16it/s]

 90%|█████████ | 90/100 [01:20<00:08,  1.17it/s]

Epoch [90/100], Loss: 0.0992
Epoch [90/100], Loss: 0.0992


 91%|█████████ | 91/100 [01:21<00:08,  1.12it/s]

 92%|█████████▏| 92/100 [01:22<00:07,  1.12it/s]

 93%|█████████▎| 93/100 [01:23<00:05,  1.17it/s]

 94%|█████████▍| 94/100 [01:24<00:05,  1.13it/s]

 95%|█████████▌| 95/100 [01:25<00:04,  1.15it/s]

 96%|█████████▌| 96/100 [01:26<00:03,  1.16it/s]

 97%|█████████▋| 97/100 [01:27<00:02,  1.15it/s]

 98%|█████████▊| 98/100 [01:27<00:01,  1.19it/s]

 99%|█████████▉| 99/100 [01:28<00:00,  1.19it/s]

100%|██████████| 100/100 [01:29<00:00,  1.19it/s]

100%|██████████| 100/100 [01:29<00:00,  1.12it/s]

Epoch [100/100], Loss: 0.0971
Epoch [100/100], Loss: 0.0971





In [16]:
model.eval()
with torch.inference_mode():
    y_pred = model(X_test)
    y_pred = (y_pred > 0.5)
print(y_pred)

tensor([[False],
        [False],
        [False],
        ...,
        [False],
        [False],
        [False]])
tensor([[False],
        [False],
        [False],
        ...,
        [False],
        [False],
        [False]])


In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score
confusion_matrix = confusion_matrix(y_true=y_test, y_pred=y_pred)
print(confusion_matrix)
print(accuracy_score(y_true=y_test, y_pred=y_pred))

[[1510   85]
 [ 197  208]]
0.859
[[1510   85]
 [ 197  208]]
0.859
