In [None]:
import torch
from torch import nn
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score, ConfusionMatrixDisplay, classification_report, precision_score, recall_score, f1_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, LabelEncoder

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(0)

In [None]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df

In [None]:
df['Churn'].value_counts(normalize = False)

In [None]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna(subset=['TotalCharges'])

In [None]:
df['PaperlessBilling'].value_counts(normalize = False)

In [None]:
df['gender'].value_counts(normalize = False) # 0 x

In [None]:
df['InternetService'].value_counts(normalize=False) # 7

In [None]:
df['MultipleLines'].value_counts(normalize=False) # 6

In [None]:
df['Contract'].value_counts(normalize = False)# 14

In [None]:
df['PaymentMethod'].value_counts(normalize = False) # 16

In [None]:
df['TotalCharges'].isnull().sum()

In [None]:
X = df.iloc[:, 1:-1]
y = df.iloc[:, -1].values

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)

# Splitting and Encoding the Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
# Encoding Categorical and Labeled data
categorical_data = ['MultipleLines', 'InternetService', 'Contract', 'PaymentMethod']
label_encoding_cols = ['gender', 'Partner', 'Dependents', 'PhoneService',
                'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling']
numeric_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), categorical_data),
                                     ('ordinal', OrdinalEncoder(), label_encoding_cols),
                                     ('num', StandardScaler(), numeric_cols)], remainder = "passthrough")

X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)

In [None]:
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
X_train = torch.tensor(X_train, dtype = torch.float32)
X_test = torch.tensor(X_test, dtype = torch.float32)

In [None]:
X_train.shape

# Models


In [None]:
X_train.shape[1]

In [None]:
class ChurnModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.relu1 = nn.ReLU()

    self.layer_2 = nn.Linear(128, 128)
    self.relu2 = nn.ReLU()

    self.layer_3 = nn.Linear(128, 128)
    self.relu3 = nn.ReLU()

    self.layer_4 = nn.Linear(128, 1)

  def forward(self, x):
    x = self.relu1(self.layer_1(x))
    x = self.relu2(self.layer_2(x))
    x = self.layer_4(self.layer_3(x))
    return x

In [None]:
class ChurnModelV2(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.batchnorm1 = nn.BatchNorm1d(num_features=128)
    self.relu1 = nn.ReLU()
    self.dropout1 = nn.Dropout(p=0.2)

    self.layer_2 = nn.Linear(128, 128)
    self.batchnorm2 = nn.BatchNorm1d(num_features=128)
    self.relu2 = nn.ReLU()
    self.dropout2 = nn.Dropout(p=0.2)

    self.layer_3 = nn.Linear(128, 1)

  def forward(self, x):
    x = self.batchnorm1(self.layer_1(x))
    x = self.relu1(x)
    x = self.dropout1(x)

    x = self.batchnorm2(self.layer_2(x))
    x = self.relu2(x)
    x = self.dropout2(x)

    x = self.layer_3(x)
    return x


In [None]:
class ChurnModelV3(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.relu1 = nn.ReLU()

    self.layer_2 = nn.Linear(128, 128)
    self.relu2 = nn.ReLU()

    self.layer_3 = nn.Linear(128, 128)

    self.layer_4 = nn.Linear(128, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.relu1(self.layer_1(x))
    x = self.relu2(self.layer_2(x))
    x = self.layer_4(self.layer_3(x))
    x = self.sigmoid(x)
    return x

In [None]:
class ChurnModelV4(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.batchnorm1 = nn.BatchNorm1d(num_features=128)
    self.relu1 = nn.ReLU()
    self.dropout1 = nn.Dropout(p = 0.2)

    self.layer_2 = nn.Linear(128, 128)
    self.batchnorm2 = nn.BatchNorm1d(num_features=128)
    self.relu2 = nn.ReLU()
    self.dropout2 = nn.Dropout(p = 0.2)

    self.layer_3 = nn.Linear(128, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.batchnorm1(self.layer_1(x))
    x = self.relu1(x)
    x = self.dropout1(x)

    x = self.batchnorm2(self.layer_2(x))
    x = self.relu2(x)
    x = self.dropout2(x)

    x = self.layer_3(x)
    x = self.sigmoid(x)
    return x

In [None]:
class ChurnModelV5(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.LeakyRelu1 = nn.LeakyReLU()

    self.layer_2 = nn.Linear(128, 128)
    self.LeakyRelu2 = nn.LeakyReLU()

    self.layer_3 = nn.Linear(128, 1)

  def forward(self, x):
    x = self.LeakyRelu1(self.layer_1(x))
    x = self.LeakyRelu2(self.layer_2(x))
    x = self.layer_3(x)
    return x

In [None]:
class ChurnModelV6(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.batchnorm1 = nn.BatchNorm1d(num_features=128)
    self.relu1 = nn.LeakyReLU()
    self.dropout1 = nn.Dropout(p = 0.2)

    self.layer_2 = nn.Linear(128, 128)
    self.batchnorm2 = nn.BatchNorm1d(num_features=128)
    self.relu2 = nn.LeakyReLU()
    self.dropout2 = nn.Dropout(p = 0.2)

    self.layer_3 = nn.Linear(128, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.batchnorm1(self.layer_1(x))
    x = self.relu1(x)
    x = self.dropout1(x)

    x = self.batchnorm2(self.layer_2(x))
    x = self.relu2(x)
    x = self.dropout2(x)

    x = self.layer_3(x)
    x = self.sigmoid(x)
    return x

In [None]:
class ChurnModelV7(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.batchnorm1 = nn.BatchNorm1d(num_features=128)
    self.relu1 = nn.LeakyReLU()
    self.dropout1 = nn.Dropout(p = 0.2)

    self.layer_2 = nn.Linear(128, 128)
    self.batchnorm2 = nn.BatchNorm1d(num_features=128)
    self.relu2 = nn.LeakyReLU()
    self.dropout2 = nn.Dropout(p = 0.3)

    self.layer_3 = nn.Linear(128, 64)
    self.batchnorm3 = nn.BatchNorm1d(num_features=64)
    self.relu3 = nn.LeakyReLU()
    self.dropout3 = nn.Dropout(p = 0.2)

    self.layer_4 = nn.Linear(64, 1)

  def forward(self, x):
    x = self.batchnorm1(self.layer_1(x))
    x = self.relu1(x)
    x = self.dropout1(x)

    x = self.batchnorm2(self.layer_2(x))
    x = self.relu2(x)
    x = self.dropout2(x)

    x = self.batchnorm3(self.layer_3(x))
    x = self.relu3(x)
    x = self.dropout3(x)

    x = self.layer_4(x)
    return x

In [None]:
model_1 = ChurnModel()
model_2 = ChurnModelV2()
model_3 = ChurnModelV3() # Sigmoid
model_4 = ChurnModelV4() # Sigmoid
model_5 = ChurnModelV5()
model_6 = ChurnModelV6() # Sigmoid
model_7 = ChurnModelV7()

# Initial Training set Accuracies


In [None]:
# MODEL 1
with torch.inference_mode():
  y_pred = model_1(X_train)
  y_pred_probs = torch.sigmoid(y_pred)
  y_labels = torch.round(y_pred_probs)

print(f"Initial model accuracy : {accuracy_score(y_train, y_labels) * 100:.4f} %\n")
cm_1 = confusion_matrix(y_train, y_labels)

fig, ax = plt.subplots(figsize=(4, 4))

disp = ConfusionMatrixDisplay(confusion_matrix=cm_1, display_labels=["Stayed", "Exited"])
disp.plot(cmap="Blues", ax=ax)
ax.set_title("Model-1 Confusion Matrix")
plt.grid(False)
plt.show()

In [None]:
# MODEL 2
with torch.inference_mode():
  y_pred = model_2(X_train)
  y_pred_probs = torch.sigmoid(y_pred)
  y_labels = torch.round(y_pred_probs)

print(f"Initial model accuracy : {accuracy_score(y_train, y_labels) * 100:.4f} %\n")
cm_2 = confusion_matrix(y_train, y_labels)

fig, ax = plt.subplots(figsize=(4, 4))

disp = ConfusionMatrixDisplay(confusion_matrix=cm_2, display_labels=["Stayed", "Exited"])
disp.plot(cmap="Blues", ax=ax)
ax.set_title("Model-2 Confusion Matrix")
plt.grid(False)
plt.show()

In [None]:
# MODEL 3
with torch.inference_mode():
  y_pred = model_3(X_train)
  y_labels = torch.round(y_pred)

print(f"Initial model accuracy : {accuracy_score(y_train, y_labels) * 100:.4f} %\n")
cm_3 = confusion_matrix(y_train, y_labels)

fig, ax = plt.subplots(figsize=(4, 4))

disp = ConfusionMatrixDisplay(confusion_matrix=cm_3, display_labels=["Stayed", "Exited"])
disp.plot(cmap="Blues", ax=ax)
ax.set_title("Model-3 Confusion Matrix")
plt.grid(False)
plt.show()

In [None]:
# MODEL 4
with torch.inference_mode():
  y_pred = model_4(X_train)
  y_labels = torch.round(y_pred)

print(f"Initial model accuracy : {accuracy_score(y_train, y_labels) * 100:.4f} %\n")
cm_4 = confusion_matrix(y_train, y_labels)

fig, ax = plt.subplots(figsize=(4, 4))

disp = ConfusionMatrixDisplay(confusion_matrix=cm_4, display_labels=["Stayed", "Exited"])
disp.plot(cmap="Blues", ax=ax)
ax.set_title("Model-4 Confusion Matrix")
plt.grid(False)
plt.show()

In [None]:
# MODEL 5
with torch.inference_mode():
  y_pred = model_5(X_train)
  y_pred_probs = torch.sigmoid(y_pred)
  y_labels = torch.round(y_pred_probs)

print(f"Initial model accuracy : {accuracy_score(y_train, y_labels) * 100:.4f} %\n")
cm_5 = confusion_matrix(y_train, y_labels)

fig, ax = plt.subplots(figsize=(4, 4))

disp = ConfusionMatrixDisplay(confusion_matrix=cm_5, display_labels=["Stayed", "Exited"])
disp.plot(cmap="Blues", ax=ax)
ax.set_title("Model-5 Confusion Matrix")
plt.grid(False)
plt.show()

In [None]:
# MODEL 6
with torch.inference_mode():
  y_pred = model_6(X_train)
  y_labels = torch.round(y_pred)

print(f"Initial model accuracy : {accuracy_score(y_train, y_labels) * 100:.4f} %\n")

cm_6 = confusion_matrix(y_train, y_labels)

fig, ax = plt.subplots(figsize=(4, 4))

disp = ConfusionMatrixDisplay(confusion_matrix=cm_6, display_labels=["Stayed", "Exited"])
disp.plot(cmap="Blues", ax=ax)
ax.set_title("Model-6 Confusion Matrix")
plt.grid(False)
plt.show()

In [None]:
# MODEL 7
with torch.inference_mode():
  y_pred = model_1(X_train)
  y_pred_probs = torch.sigmoid(y_pred)
  y_labels = torch.round(y_pred_probs)

print(f"Initial model accuracy : {accuracy_score(y_train, y_labels) * 100:.4f} %\n")
cm_1 = confusion_matrix(y_train, y_labels)

fig, ax = plt.subplots(figsize=(4, 4))

disp = ConfusionMatrixDisplay(confusion_matrix=cm_1, display_labels=["Stayed", "Exited"])
disp.plot(cmap="Blues", ax=ax)
ax.set_title("Model-1 Confusion Matrix")
plt.grid(False)
plt.show()

# Optimizer and Loss Functions

In [None]:
pos_weight = torch.tensor([5163 / 1869])  # 2.27
# pos_weight=pos_weight

In [None]:
# Loss Functions
loss_fn_1 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
loss_fn_2 = nn.BCELoss()

# SGD — good with high lr
SGD_optim_1 = torch.optim.SGD(params=model_1.parameters(), lr=0.1, weight_decay=1e-4)
SGD_optim_2 = torch.optim.SGD(params=model_2.parameters(), lr=0.1, weight_decay=1e-4)
SGD_optim_3 = torch.optim.SGD(params=model_3.parameters(), lr=0.1, weight_decay=1e-4)
SGD_optim_4 = torch.optim.SGD(params=model_4.parameters(), lr=0.1, weight_decay=1e-4)
SGD_optim_5 = torch.optim.SGD(params=model_5.parameters(), lr=0.1, weight_decay=1e-4)
SGD_optim_6 = torch.optim.SGD(params=model_6.parameters(), lr=0.1, weight_decay=1e-4)

# Adam — best general optimizer
Adam_optim_1 = torch.optim.Adam(params=model_1.parameters(), lr=0.0007, weight_decay=1e-5)
Adam_optim_2 = torch.optim.Adam(params=model_2.parameters(), lr=0.0007, weight_decay=1e-5)
Adam_optim_3 = torch.optim.Adam(params=model_3.parameters(), lr=0.0007, weight_decay=1e-5)
Adam_optim_4 = torch.optim.Adam(params=model_4.parameters(), lr=0.0007, weight_decay=1e-5)
Adam_optim_5 = torch.optim.Adam(params=model_5.parameters(), lr=0.0007, weight_decay=1e-5)
Adam_optim_6 = torch.optim.Adam(params=model_6.parameters(), lr=0.0007, weight_decay=1e-5)

# RMSprop — better for noisy data
RMSprop_optim_1 = torch.optim.RMSprop(params=model_1.parameters(), lr=0.0009, weight_decay=1e-5)
RMSprop_optim_2 = torch.optim.RMSprop(params=model_2.parameters(), lr=0.0009, weight_decay=1e-5)
RMSprop_optim_3 = torch.optim.RMSprop(params=model_3.parameters(), lr=0.0009, weight_decay=1e-5)
RMSprop_optim_4 = torch.optim.RMSprop(params=model_4.parameters(), lr=0.0009, weight_decay=1e-5)
RMSprop_optim_5 = torch.optim.RMSprop(params=model_5.parameters(), lr=0.0009, weight_decay=1e-5)
RMSprop_optim_6 = torch.optim.RMSprop(params=model_6.parameters(), lr=0.0009, weight_decay=1e-5)

# SGD with momentum (SGDW) — best when tuned well
SGDW_optim_1 = torch.optim.SGD(params=model_1.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-4)
SGDW_optim_2 = torch.optim.SGD(params=model_2.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-4)
SGDW_optim_3 = torch.optim.SGD(params=model_3.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-4)
SGDW_optim_4 = torch.optim.SGD(params=model_4.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-4)
SGDW_optim_5 = torch.optim.SGD(params=model_5.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-4)
SGDW_optim_6 = torch.optim.SGD(params=model_6.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-4)

# Adagrad — not great for DL, but okay for comparison
Adagrad_optim_1 = torch.optim.Adagrad(model_1.parameters(), lr=0.01, weight_decay=1e-4)
Adagrad_optim_2 = torch.optim.Adagrad(model_2.parameters(), lr=0.01, weight_decay=1e-4)
Adagrad_optim_3 = torch.optim.Adagrad(model_3.parameters(), lr=0.01, weight_decay=1e-4)
Adagrad_optim_4 = torch.optim.Adagrad(model_4.parameters(), lr=0.01, weight_decay=1e-4)
Adagrad_optim_5 = torch.optim.Adagrad(model_5.parameters(), lr=0.01, weight_decay=1e-4)
Adagrad_optim_6 = torch.optim.Adagrad(model_6.parameters(), lr=0.01, weight_decay=1e-4)

# (Optional) AdamW — best for modern deep learning
AdamW_optim_1 = torch.optim.AdamW(model_7.parameters(), lr=0.001, weight_decay=1e-4)


# Training and Testing loops

In [None]:
SGD_precision_scores = []
SGD_f1_scores = []
SGD_recall_scores = []
SGD_accuracy_scores = []

adam_precision_scores = []
adam_f1_scores = []
adam_recall_scores = []
adam_accuracy_scores = []

rms_precision_scores = []
rms_f1_scores = []
rms_recall_scores = []
rms_accuracy_scores = []

SGDW_precision_scores = []
SGDW_f1_scores = []
SGDW_recall_scores = []
SGDW_accuracy_scores = []

adagrad_precision_scores = []
adagrad_f1_scores = []
adagrad_recall_scores = []
adagrad_accuracy_scores = []

In [None]:
def model_loopV1(optimizer, loss_fn, model, precision, recall, f1, accuracy, model_name="", optimizer_name="", epochs=150, limit=100, plot = False, matrix = False, score_board = False, cf = False):
    best_accuracy = 0
    patience_counter = 0
    test_label = 0
    best_accuracy = 0

    epoch_counts = []
    test_loss_values = []
    train_loss_values = []
    test_accuracies = []
    threshold = 0.45

    for epoch in range(epochs):
        model.train()

        y_logits = model(X_train)
        y_pred_probs = torch.sigmoid(y_logits)
        y_labels = (y_pred_probs > threshold).int()

        loss = loss_fn(y_logits, y_train)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        model.eval()
        with torch.inference_mode():
            test_logits = model(X_test)
            test_pred_probs = torch.sigmoid(test_logits)
            test_labels = (test_pred_probs > threshold).int()

            test_loss = loss_fn(test_logits, y_test)
            test_accuracy = accuracy_score(y_test, test_labels)

            epoch_counts.append(epoch)
            test_loss_values.append(test_loss.item())
            train_loss_values.append(loss.item())
            test_accuracies.append(test_accuracy)

            # Early stopping
            if test_accuracy > best_accuracy:
                best_accuracy = test_accuracy
                patience_counter = 0
            else:
                patience_counter += 1
            if score_board == True:

              if epoch % 10 == 0:
                  print(f"Epoch {epoch} | Train Loss: {loss:.4f} | Test Loss: {test_loss:.4f} | Accuracy: {test_accuracy:.4f}")

            if patience_counter >= limit:
                print(f"\n Early stopping at epoch {epoch} | no improvement in last {limit} epochs.")
                break

    print(f"\n Best accuracy: {best_accuracy * 100:.4f}")

    if plot == True:
      plt.figure(figsize=(10, 5))
      plt.plot(test_accuracies, label="Test Accuracy", color="green")
      plt.title(f"Test Accuracy over Epochs (Best: {best_accuracy * 100:.2f}) %")
      plt.xlabel("Epochs")
      plt.ylabel("Accuracy")
      plt.legend()
      plt.grid(True, alpha = 0.6)
      plt.show()

    if matrix == True:
      cm = confusion_matrix(y_test, test_labels)

      fig, ax = plt.subplots(figsize=(5, 5))

      disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ["Stayed", "Exited"])
      disp.plot(cmap = "Blues", ax = ax)
      ax.set_title(f"Confusion Matrix of {optimizer_name} with {model_name}")
      plt.grid(False)
      plt.show()

    if cf == True:
      print(classification_report(y_test, test_labels, target_names=["Stayed", "Exited"]))

    p_score = round(precision_score(y_test, test_labels), 2)
    r_score = round(recall_score(y_test, test_labels), 2)
    f_score = round(f1_score(y_test, test_labels), 2)
    best_accuracy = round(best_accuracy * 100, 2)
    p_score = p_score * 100
    r_score = r_score * 100
    f_score = f_score * 100

    precision.append(p_score)
    recall.append(r_score)
    f1.append(f_score)
    accuracy.append(best_accuracy)

    print(f"Precision Score: {p_score:.2f} %")
    print(f"Recall Score:    {r_score:.2f} %")
    print(f"F1 Score:        {f_score:.2f} %")


In [None]:
def model_loopV2(optimizer, loss_fn, model, precision, recall, f1, accuracy, model_name="", optimizer_name="", epochs=150, limit=100, plot = False, matrix = False, score_board = False, cf = False):
    best_accuracy = 0
    patience_counter = 0
    test_label = 0

    epoch_counts = []
    test_loss_values = []
    train_loss_values = []
    test_accuracies = []

    for epoch in range(epochs):
        model.train()

        y_preds = model(X_train)
        loss = loss_fn(y_preds, y_train)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        model.eval()
        with torch.inference_mode():
            test_pred_probs = model(X_test)
            test_labels = torch.round(test_pred_probs)

            test_loss = loss_fn(test_pred_probs, y_test)

            test_accuracy = accuracy_score(y_test, test_labels)

            epoch_counts.append(epoch)
            test_loss_values.append(test_loss.item())
            train_loss_values.append(loss.item())
            test_accuracies.append(test_accuracy)

            # Early stopping
            if test_accuracy > best_accuracy:
                best_accuracy = test_accuracy
                patience_counter = 0
            else:
                patience_counter += 1

            if score_board == True :
              if epoch % 10 == 0:
                  print(f"Epoch {epoch} | Train Loss: {loss:.4f} | Test Loss: {test_loss:.4f} | Accuracy: {test_accuracy:.4f}")

            if patience_counter >= limit:
                print(f"\n Early stopping at epoch {epoch} | no improvement in last {limit} epochs.")
                break

    print(f"\n Best accuracy: {best_accuracy * 100:.4f}")

    if plot == True:
      plt.figure(figsize=(10, 5))
      plt.plot(test_accuracies, label="Test Accuracy", color="green")
      plt.title(f"Test Accuracy over Epochs (Best: {best_accuracy * 100:.2f}) %")
      plt.xlabel("Epochs")
      plt.ylabel("Accuracy")
      plt.legend()
      plt.grid(True, alpha = 0.6)
      plt.show()

    if matrix == True:
      cm = confusion_matrix(y_test, test_labels)

      fig, ax = plt.subplots(figsize=(5, 5))

      disp = ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ["Stayed", "Exited"])
      disp.plot(cmap = "Blues", ax = ax)
      ax.set_title(f"Confusion Matrix of {optimizer_name} with {model_name}")
      plt.grid(False)
      plt.show()

    if cf == True:
      print(classification_report(y_test, test_labels, target_names=["Stayed", "Exited"]))

    p_score = round(precision_score(y_test, test_labels), 2)
    r_score = round(recall_score(y_test, test_labels), 2)
    f_score = round(f1_score(y_test, test_labels), 2)
    best_accuracy = round(best_accuracy * 100, 2)
    p_score = p_score * 100
    r_score = r_score * 100
    f_score = f_score * 100

    precision.append(p_score)
    recall.append(r_score)
    f1.append(f_score)
    accuracy.append(best_accuracy)

    print(f"Precision Score: {p_score:.2f} %")
    print(f"Recall Score:    {r_score:.2f} %")
    print(f"F1 Score:        {f_score:.2f} %")

In [None]:
# @title
sns.set_style("whitegrid")

def accuracy_plot(models, accuracies, optimizer=''):
    best_idx = accuracies.index(max(accuracies))
    bar_colors = ['#B0C4DE' if i != best_idx else '#4682B4' for i in range(len(models))]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(models, accuracies, color=bar_colors, edgecolor='black', width=0.55)

    # Annotate bars with accuracy values
    for bar, acc in zip(bars, accuracies):
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            acc + 0.3,
            f"{acc:.2f}%",
            ha='center',
            va='bottom',
            fontsize=11,
            fontweight='semibold',
            color='#333333'
        )

    # Baseline reference line at 80%
    plt.axhline(y=80, color='grey', linestyle='--', linewidth=1, alpha=0.5)

    plt.title(
        f'Performance of {optimizer} Optimizer Across ANN Architectures\n'
        f'(Best: Model-{best_idx + 1} with {accuracies[best_idx]:.2f}%)',
        fontsize=14,
        fontweight='bold',
        pad=15
    )
    plt.xlabel('Model Architectures', fontsize=12)
    plt.ylabel('Test Accuracy (%)', fontsize=12)
    plt.ylim(70, 85)
    plt.xticks(fontsize=11)
    plt.yticks(fontsize=11)
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.show()

In [None]:
# @title
def plot_metrics_grouped(models, precisions, recalls, f1s):
    n_models = len(models)
    metrics = ['Precision', 'Recall', 'F1']
    n_metrics = len(metrics)

    bar_width = 0.25
    x = np.arange(n_models)

    plt.figure(figsize=(12, 6))

    colors = ['#3B82F6', '#EF4444', '#10B981']

    plt.bar(x - bar_width, precisions, width=bar_width, color=colors[0], label='Precision', edgecolor='black')
    plt.bar(x, recalls, width=bar_width, color=colors[1], label='Recall', edgecolor='black')
    plt.bar(x + bar_width, f1s, width=bar_width, color=colors[2], label='F1 Score', edgecolor='black')

    # Add values on top of bars
    for i in range(n_models):
        plt.text(x[i] - bar_width, precisions[i] + 1, f"{precisions[i]:.1f}%", ha='center', fontsize=9)
        plt.text(x[i], recalls[i] + 1, f"{recalls[i]:.1f}%", ha='center', fontsize=9)
        plt.text(x[i] + bar_width, f1s[i] + 1, f"{f1s[i]:.1f}%", ha='center', fontsize=9)

    plt.xticks(x, models, fontsize=11)
    plt.yticks(fontsize=11)
    plt.ylim(40, 90)  # assuming your scores hover around 50-80%, adjust if needed

    plt.axhline(y=65, color='grey', linestyle='--', linewidth=1, alpha=0.5)

    plt.ylabel('Score (%)', fontsize=12)
    plt.title('Model Performance Metrics Comparison', fontsize=14, fontweight='bold', pad=15)
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
    plt.show()


### MODEL PERFORMANCES

# Stochastic Gradient Descent

In [None]:
SGD_1 = model_loopV1( SGD_optim_1, loss_fn_1, model_1, SGD_precision_scores, SGD_recall_scores, SGD_f1_scores, SGD_accuracy_scores, 'SGD', 'Model-1')
SGD_1

In [None]:
SGD_2 = model_loopV1(SGD_optim_2, loss_fn_1, model_2, SGD_precision_scores, SGD_recall_scores, SGD_f1_scores, SGD_accuracy_scores, 'SGD', 'Model 2')
SGD_2

In [None]:
SGD_3 = model_loopV2(SGD_optim_3, loss_fn_2, model_3, SGD_precision_scores, SGD_recall_scores, SGD_f1_scores, SGD_accuracy_scores, 'SGD', 'Model 3', limit=100)
SGD_3

In [None]:
SGD_4 = model_loopV2(SGD_optim_4, loss_fn_2, model_4, SGD_precision_scores, SGD_recall_scores, SGD_f1_scores, SGD_accuracy_scores, 'SGD', 'Model 4')
SGD_4

In [None]:
SGD_5 = model_loopV1(SGD_optim_5, loss_fn_1, model_5, SGD_precision_scores, SGD_recall_scores, SGD_f1_scores, SGD_accuracy_scores, 'SGD', 'Model 5')
SGD_5

In [None]:
SGD_6 = model_loopV2(SGD_optim_6, loss_fn_2, model_6, SGD_precision_scores, SGD_recall_scores, SGD_f1_scores, SGD_accuracy_scores,'SGD', 'Model 6')
SGD_6

In [None]:
models = ['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5', 'Model 6']
accuracy_plot(models, SGD_accuracy_scores, optimizer='SGD')

In [None]:
plot_metrics_grouped(models, SGD_precision_scores, SGD_recall_scores, SGD_f1_scores)

# Adaptive moment estimator (Adam)

In [None]:
model_loopV1(Adam_optim_1, loss_fn_1, model_1, adam_precision_scores, adam_recall_scores, adam_f1_scores, adam_accuracy_scores, 'Adam', 'Model 1')

In [None]:
model_loopV1(Adam_optim_2, loss_fn_1, model_2, adam_precision_scores, adam_recall_scores, adam_f1_scores, adam_accuracy_scores, 'Adam', 'Model 2')

In [None]:
model_loopV2(Adam_optim_3, loss_fn_2, model_3, adam_precision_scores, adam_recall_scores, adam_f1_scores, adam_accuracy_scores, 'Adam', 'Model 3')

In [None]:
model_loopV2(Adam_optim_4, loss_fn_2, model_4, adam_precision_scores, adam_recall_scores, adam_f1_scores, adam_accuracy_scores, 'Adam', 'Model 4')

In [None]:
model_loopV1(Adam_optim_5, loss_fn_1, model_5, adam_precision_scores, adam_recall_scores, adam_f1_scores, adam_accuracy_scores, 'Adam', 'Model 5')

In [None]:
model_loopV2(Adam_optim_6, loss_fn_2, model_6, adam_precision_scores, adam_recall_scores, adam_f1_scores, adam_accuracy_scores, 'Adam', 'Model 6')

In [None]:
accuracy_plot(models, adam_accuracy_scores, optimizer = 'Adam')

In [None]:
plot_metrics_grouped(models, adam_precision_scores, adam_recall_scores, adam_f1_scores)

# RMSprop

In [None]:
model_loopV1(RMSprop_optim_1, loss_fn_1, model_1, rms_precision_scores, rms_recall_scores, rms_f1_scores, rms_accuracy_scores, 'RMSprop', 'Model 1')

In [None]:
model_loopV1(RMSprop_optim_2, loss_fn_1, model_2, rms_precision_scores, rms_recall_scores, rms_f1_scores, rms_accuracy_scores, 'RMSprop', 'Model 2')

In [None]:
model_loopV2(RMSprop_optim_3, loss_fn_2, model_3, rms_precision_scores, rms_recall_scores, rms_f1_scores, rms_accuracy_scores, 'RMSprop', 'Model 3')

In [None]:
model_loopV2(RMSprop_optim_4, loss_fn_2, model_4, rms_precision_scores, rms_recall_scores, rms_f1_scores, rms_accuracy_scores, 'RMSprop', 'Model 4')

In [None]:
model_loopV1(RMSprop_optim_5, loss_fn_1, model_5, rms_precision_scores, rms_recall_scores, rms_f1_scores, rms_accuracy_scores,'RMSprop', 'Model 5')

In [None]:
model_loopV2(RMSprop_optim_6, loss_fn_2, model_6, rms_precision_scores, rms_recall_scores, rms_f1_scores, rms_accuracy_scores, 'RMSprop', 'Model 6')

In [None]:
accuracy_plot(models, rms_accuracy_scores, optimizer = 'RMSprop')

In [None]:
plot_metrics_grouped(models, rms_precision_scores, rms_recall_scores, rms_f1_scores)

# SGD with Momentum


In [None]:
model_loopV1(SGDW_optim_1, loss_fn_1, model_1, SGDW_precision_scores, SGDW_recall_scores, SGDW_f1_scores, SGDW_accuracy_scores, 'SGDW', 'Model 1')

In [None]:
model_loopV1(SGDW_optim_2, loss_fn_1, model_2, SGDW_precision_scores, SGDW_recall_scores, SGDW_f1_scores, SGDW_accuracy_scores, 'SGDW', 'Model 2')

In [None]:
SGD_optim_3 = torch.optim.SGD(params=model_3.parameters(), lr=0.01, momentum=0.9)
loss_fn_2 = nn.BCELoss()
model_loopV2(SGD_optim_3, loss_fn_2, model_3, SGDW_precision_scores, SGDW_recall_scores, SGDW_f1_scores, SGDW_accuracy_scores, 'SGDW', 'Model 3')

In [None]:
model_loopV2(SGDW_optim_4, loss_fn_2, model_4, SGDW_precision_scores, SGDW_recall_scores, SGDW_f1_scores, SGDW_accuracy_scores, 'SGDW', 'Model 4')

In [None]:
model_loopV1(SGDW_optim_5, loss_fn_1, model_5, SGDW_precision_scores, SGDW_recall_scores, SGDW_f1_scores, SGDW_accuracy_scores, 'SGDW', 'Model 5')

In [None]:
model_loopV2(SGDW_optim_6, loss_fn_2, model_6, SGDW_precision_scores, SGDW_recall_scores, SGDW_f1_scores, SGDW_accuracy_scores, 'SGDW', 'Model 6')

In [None]:
accuracy_plot(models, SGDW_accuracy_scores, optimizer = 'SGD (with momentum)')
print("Model 1 and Model 5 performed similarly")

In [None]:
plot_metrics_grouped(models, SGDW_precision_scores, SGDW_recall_scores, SGDW_f1_scores)

# AdaGrad

In [None]:
model_loopV1(Adagrad_opitm_1, loss_fn_1, model_1, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores, adagrad_accuracy_scores, 'AdaGrad', 'Model 1')

In [None]:
model_loopV1(Adagrad_opitm_2, loss_fn_1, model_2, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores, adagrad_accuracy_scores, 'AdaGrad', 'Model 2')

In [None]:
model_loopV2(Adagrad_opitm_3, loss_fn_2, model_3, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores, adagrad_accuracy_scores, 'AdaGrad', 'Model 3')

In [None]:
model_loopV2(Adagrad_opitm_4, loss_fn_2, model_4, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores, adagrad_accuracy_scores, 'AdaGrad', 'Model 4')

In [None]:
model_loopV1(Adagrad_opitm_5, loss_fn_1, model_5, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores, adagrad_accuracy_scores, 'AdaGrad', 'Model 5')

In [None]:
model_loopV2(Adagrad_opitm_6, loss_fn_2, model_6, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores, adagrad_accuracy_scores, 'AdaGrad', 'Model 6')

In [None]:
accuracy_plot(models, adagrad_accuracy_scores, optimizer = 'Adagrad')

In [None]:
plot_metrics_grouped(models, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores)

In [None]:
# @title
model_loopV2(AdamW_opitm_1, loss_fn_1, model_7, adagrad_precision_scores, adagrad_recall_scores, adagrad_f1_scores, adagrad_accuracy_scores, 'AdaGrad', 'Model 4')

# Summary Table

In [None]:

summary = {
    "Model": [f"Model {i+1}" for i in range(6)] * 5,
    "Optimizer": (["SGD"] * 6 +
                  ["Adam"] * 6 +
                  ["RMSProp"] * 6 +
                  ["SGD+W"] * 6 +
                  ["Adagrad"] * 6),
    "Accuracy": SGD_accuracy_scores + adam_accuracy_scores + rms_accuracy_scores + SGDW_accuracy_scores + adagrad_accuracy_scores,
    "Precision": SGD_precision_scores + adam_precision_scores + rms_precision_scores + SGDW_precision_scores + adagrad_precision_scores,
    "Recall": SGD_recall_scores + adam_recall_scores + rms_recall_scores + SGDW_recall_scores + adagrad_recall_scores,
    "F1 Score": SGD_f1_scores + adam_f1_scores + rms_f1_scores + SGDW_f1_scores + adagrad_f1_scores
}

df = pd.DataFrame(summary)
df = df.round(2)
df

In [None]:
df["Composite Score"] = (0.3 * df["Accuracy"] +
                         0.3 * df["F1 Score"] +
                         0.2 * df["Precision"] +
                         0.2 * df["Recall"])

df = df.sort_values(by="Composite Score", ascending=False)
df.head()