In [124]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder #for data preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt


In [125]:
df = pd.read_csv("Churn_Modelling.csv")
print(df.shape)
print(df.columns.tolist())
df.head(10)

(10000, 14)
['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography', 'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard', 'IsActiveMember', 'EstimatedSalary', 'Exited']


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0
5,6,15574012,Chu,645,Spain,Male,44,8,113755.78,2,1,0,149756.71,1
6,7,15592531,Bartlett,822,France,Male,50,7,0.0,2,1,1,10062.8,0
7,8,15656148,Obinna,376,Germany,Female,29,4,115046.74,4,1,0,119346.88,1
8,9,15792365,He,501,France,Male,44,4,142051.07,2,0,1,74940.5,0
9,10,15592389,H?,684,France,Male,27,2,134603.88,1,1,1,71725.73,0


In [126]:
print(df.dtypes)

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object


In [127]:
#data preprocessing

df = df.drop(columns=["RowNumber", "CustomerId", "Surname"]) #do not affect model predictions
gender_label = LabelEncoder()
df["Gender"] = gender_label.fit_transform(df["Gender"]) #encode gender
df = pd.get_dummies(df, columns=["Geography"], drop_first=True) # One-hot encoding geography

#splitting data into features and target
X = df.drop("Exited", axis=1).values
y = df["Exited"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #split to train and test

#scaling features so that ANN works better(on similar scales)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# convert to pytorch tensors and implementation

X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test  = torch.tensor(y_test, dtype=torch.float32)

class ann(nn.Module):
    def __init__(self):
        super(ann, self).__init__()
        self.fc1 = nn.Linear(11, 64)  # 11 input features & 64 neurons considered as a balance between over-fitting and under-fitting
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1) # single output neauron for binary classification
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid() #output a value between 0 and 1

        # more neurons chosen since the model initially did not learn class 1

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

In [129]:
# initializing and optimizing the model

model = ann()
criterion = nn.BCELoss() # Binary Cross-entropy is ideal for binary classification
optimizer= optim.Adam(model.parameters(),lr=0.001) #standard and stable choice for learning rate

# training the model

epochs=300 # sufficient learning without heavy over-fitting
# more epochs can cause over-fitting although reduces loss
# chosen by analysing the confusion matrix
losses = []
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs.squeeze(), y_train)
    loss.backward()
    optimizer.step()
    losses.append(loss.item())
    if (epoch + 1) % 10 == 0:
       print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [10/300], Loss: 0.6720
Epoch [20/300], Loss: 0.6116
Epoch [30/300], Loss: 0.5524
Epoch [40/300], Loss: 0.5114
Epoch [50/300], Loss: 0.4888
Epoch [60/300], Loss: 0.4704
Epoch [70/300], Loss: 0.4559
Epoch [80/300], Loss: 0.4445
Epoch [90/300], Loss: 0.4353
Epoch [100/300], Loss: 0.4280
Epoch [110/300], Loss: 0.4221
Epoch [120/300], Loss: 0.4170
Epoch [130/300], Loss: 0.4121
Epoch [140/300], Loss: 0.4072
Epoch [150/300], Loss: 0.4019
Epoch [160/300], Loss: 0.3962
Epoch [170/300], Loss: 0.3902
Epoch [180/300], Loss: 0.3840
Epoch [190/300], Loss: 0.3778
Epoch [200/300], Loss: 0.3715
Epoch [210/300], Loss: 0.3652
Epoch [220/300], Loss: 0.3592
Epoch [230/300], Loss: 0.3535
Epoch [240/300], Loss: 0.3483
Epoch [250/300], Loss: 0.3438
Epoch [260/300], Loss: 0.3399
Epoch [270/300], Loss: 0.3365
Epoch [280/300], Loss: 0.3336
Epoch [290/300], Loss: 0.3310
Epoch [300/300], Loss: 0.3287


In [130]:
model.eval()

with torch.no_grad():
    predictions = model(X_test)
    predictions = (predictions >= 0.5).float()

accuracy = accuracy_score(y_test, predictions)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.8585


In [131]:
cm = confusion_matrix(y_test, predictions)
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[1537   70]
 [ 213  180]]
