In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

In [2]:
datas = pd.read_csv("Churn_Modelling.csv")
print(datas)

      RowNumber  CustomerId    Surname  CreditScore Geography  Gender  Age  \
0             1    15634602   Hargrave          619    France  Female   42   
1             2    15647311       Hill          608     Spain  Female   41   
2             3    15619304       Onio          502    France  Female   42   
3             4    15701354       Boni          699    France  Female   39   
4             5    15737888   Mitchell          850     Spain  Female   43   
...         ...         ...        ...          ...       ...     ...  ...   
9995       9996    15606229   Obijiaku          771    France    Male   39   
9996       9997    15569892  Johnstone          516    France    Male   35   
9997       9998    15584532        Liu          709    France  Female   36   
9998       9999    15682355  Sabbatini          772   Germany    Male   42   
9999      10000    15628319     Walker          792    France  Female   28   

      Tenure    Balance  NumOfProducts  HasCrCard  IsActiveMemb

In [4]:
datas = datas.drop(columns = ["RowNumber", "CustomerId", "Surname"])
print(datas)

      CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0             619    France  Female   42       2       0.00              1   
1             608     Spain  Female   41       1   83807.86              1   
2             502    France  Female   42       8  159660.80              3   
3             699    France  Female   39       1       0.00              2   
4             850     Spain  Female   43       2  125510.82              1   
...           ...       ...     ...  ...     ...        ...            ...   
9995          771    France    Male   39       5       0.00              2   
9996          516    France    Male   35      10   57369.61              1   
9997          709    France  Female   36       7       0.00              1   
9998          772   Germany    Male   42       3   75075.31              2   
9999          792    France  Female   28       4  130142.79              1   

      HasCrCard  IsActiveMember  EstimatedSalary  Exited  
0   

In [9]:
X = datas.drop(columns = ["Exited"])
Y= datas["Exited"].to_numpy()
encoder = OneHotEncoder(drop='first')
encoded = encoder.fit_transform(X[["Gender", "Geography"]]).toarray()
X = X.drop(columns=["Gender", "Geography"])
X = np.concatenate([X.values, encoded], axis=1)
print(X)
print(Y)

[[619.  42.   2. ...   0.   0.   0.]
 [608.  41.   1. ...   0.   0.   1.]
 [502.  42.   8. ...   0.   0.   0.]
 ...
 [709.  36.   7. ...   0.   0.   0.]
 [772.  42.   3. ...   1.   1.   0.]
 [792.  28.   4. ...   0.   0.   0.]]
[1 0 1 ... 1 1 0]


In [21]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
print(X)

[[-0.32622142  0.29351742 -1.04175968 ... -1.09598752 -0.57873591
  -0.57380915]
 [-0.44003595  0.19816383 -1.38753759 ... -1.09598752 -0.57873591
   1.74273971]
 [-1.53679418  0.29351742  1.03290776 ... -1.09598752 -0.57873591
  -0.57380915]
 ...
 [ 0.60498839 -0.27860412  0.68712986 ... -1.09598752 -0.57873591
  -0.57380915]
 [ 1.25683526  0.29351742 -0.69598177 ...  0.91241915  1.72790383
  -0.57380915]
 [ 1.46377078 -1.04143285 -0.35020386 ... -1.09598752 -0.57873591
  -0.57380915]]


In [22]:
x_s, x_t, y_s, y_t = train_test_split(X, Y, test_size=0.25)


In [23]:
X_s = torch.tensor(x_s, dtype=torch.float32)
X_t = torch.tensor(x_t, dtype=torch.float32)
Y_s = torch.tensor(y_s, dtype=torch.float32).view(-1,1)
Y_t = torch.tensor(y_t, dtype=torch.float32).view(-1,1)

# Dataset and loader
train_data = TensorDataset(X_s, Y_s)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)


In [24]:
class ANNModel(nn.Module):
    def __init__(self, input_dim):
        super(ANNModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))  # binary classification
        return x

model = ANNModel(input_dim=X_s.shape[1])

In [25]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 100
for epoch in range(epochs):
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

    if (epoch+1) % 10 == 0:
        with torch.no_grad():
            preds = model(X_t)
            acc = ((preds.round() == Y_t).sum().item()) / len(Y_t)
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Test Acc: {acc:.4f}")

Epoch 10/100, Loss: 0.4065, Test Acc: 0.8568
Epoch 20/100, Loss: 0.5182, Test Acc: 0.8560
Epoch 30/100, Loss: 0.5764, Test Acc: 0.8536
Epoch 40/100, Loss: 0.2678, Test Acc: 0.8552
Epoch 50/100, Loss: 0.4099, Test Acc: 0.8516
Epoch 60/100, Loss: 0.0901, Test Acc: 0.8504
Epoch 70/100, Loss: 0.2745, Test Acc: 0.8516
Epoch 80/100, Loss: 0.5013, Test Acc: 0.8464
Epoch 90/100, Loss: 0.2063, Test Acc: 0.8504
Epoch 100/100, Loss: 0.1343, Test Acc: 0.8492


In [27]:
with torch.no_grad():
    preds = model(X_t)
    acc = ((preds.round() == Y_t).sum().item()) / len(Y_t)
print(f"Final Test Accuracy: {acc:.4f}")


Final Test Accuracy: 0.8492
