In [1144]:
import torch
from torch import nn
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder, LabelEncoder

In [1145]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(0)

In [1146]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.describe()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692
std,0.368612,24.559481,30.090047
min,0.0,0.0,18.25
25%,0.0,9.0,35.5
50%,0.0,29.0,70.35
75%,0.0,55.0,89.85
max,1.0,72.0,118.75


In [1147]:
print(df.info)

<bound method DataFrame.info of       customerID  gender  SeniorCitizen Partner Dependents  tenure  \
0     7590-VHVEG  Female              0     Yes         No       1   
1     5575-GNVDE    Male              0      No         No      34   
2     3668-QPYBK    Male              0      No         No       2   
3     7795-CFOCW    Male              0      No         No      45   
4     9237-HQITU  Female              0      No         No       2   
...          ...     ...            ...     ...        ...     ...   
7038  6840-RESVB    Male              0     Yes        Yes      24   
7039  2234-XADUH  Female              0     Yes        Yes      72   
7040  4801-JZAZL  Female              0     Yes        Yes      11   
7041  8361-LTMKD    Male              1     Yes         No       4   
7042  3186-AJIEK    Male              0      No         No      66   

     PhoneService     MultipleLines InternetService OnlineSecurity  ...  \
0              No  No phone service             DSL 

In [1148]:
df.isnull().sum()

Unnamed: 0,0
customerID,0
gender,0
SeniorCitizen,0
Partner,0
Dependents,0
tenure,0
PhoneService,0
MultipleLines,0
InternetService,0
OnlineSecurity,0


In [1149]:
df

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


In [1150]:
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df = df.dropna(subset=['TotalCharges'])

In [1151]:
df['PaperlessBilling'].value_counts(normalize = False)

Unnamed: 0_level_0,count
PaperlessBilling,Unnamed: 1_level_1
Yes,4168
No,2864


In [1152]:
df['gender'].value_counts(normalize = False) # 0 x

Unnamed: 0_level_0,count
gender,Unnamed: 1_level_1
Male,3549
Female,3483


In [1153]:
df['InternetService'].value_counts(normalize=False) # 7

Unnamed: 0_level_0,count
InternetService,Unnamed: 1_level_1
Fiber optic,3096
DSL,2416
No,1520


In [1154]:
df['MultipleLines'].value_counts(normalize=False) # 6

Unnamed: 0_level_0,count
MultipleLines,Unnamed: 1_level_1
No,3385
Yes,2967
No phone service,680


In [1155]:
df['Contract'].value_counts(normalize = False)# 14

Unnamed: 0_level_0,count
Contract,Unnamed: 1_level_1
Month-to-month,3875
Two year,1685
One year,1472


In [1156]:
df['PaymentMethod'].value_counts(normalize = False) # 16

Unnamed: 0_level_0,count
PaymentMethod,Unnamed: 1_level_1
Electronic check,2365
Mailed check,1604
Bank transfer (automatic),1542
Credit card (automatic),1521


In [1157]:
df['TotalCharges'].isnull().sum()

np.int64(0)

In [1158]:
X = df.iloc[:, 1:-1]
y = df.iloc[:, -1].values

In [1159]:
le = LabelEncoder()
y = le.fit_transform(y)

# Splitting and Encoding the Data

In [1160]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [1161]:
# Encoding Categorical and Labeled data
categorical_data = ['MultipleLines', 'InternetService', 'Contract', 'PaymentMethod']
label_encoding_cols = ['gender', 'Partner', 'Dependents', 'PhoneService',
                'OnlineSecurity', 'OnlineBackup', 'DeviceProtection',
                'TechSupport', 'StreamingTV', 'StreamingMovies', 'PaperlessBilling']
numeric_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), categorical_data),
                                     ('ordinal', OrdinalEncoder(), label_encoding_cols),
                                     ('num', StandardScaler(), numeric_cols)], remainder = "passthrough")

X_train = ct.fit_transform(X_train)
X_test = ct.transform(X_test)

In [1162]:
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
X_train = torch.tensor(X_train, dtype = torch.float32)
X_test = torch.tensor(X_test, dtype = torch.float32)

In [1163]:
X_train.shape

torch.Size([5625, 28])

# Models


In [1164]:
class ChurnModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.relu1 = nn.ReLU()

    self.layer_2 = nn.Linear(128, 128)
    self.relu2 = nn.ReLU()

    self.layer_3 = nn.Linear(128, 1)

  def forward(self, x):
    x = self.relu1(self.layer_1(x))
    x = self.relu2(self.layer_2(x))
    x = self.layer_3(x)
    return x

In [1165]:
class ChurnModelV2(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.batchnorm1 = nn.BatchNorm1d(num_features=128)
    self.relu1 = nn.ReLU()
    self.dropout1 = nn.Dropout(p = 0.05)

    self.layer_2 = nn.Linear(128, 128)
    self.batchnorm2 = nn.BatchNorm1d(num_features=128)
    self.relu2 = nn.ReLU()
    self.dropout2 = nn.Dropout(p = 0.05)

    self.layer_3 = nn.Linear(128, 1)

  def forward(self, x):
    x = self.batchnorm1(self.layer_1(x))
    x = self.relu1(x)
    x = self.dropout1(x)

    x = self.batchnorm2(self.layer_2(x))
    x = self.relu2(x)
    x = self.dropout2(x)

    x = self.layer_3(x)
    return x

In [1166]:
class ChurnModelV3(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer_1 = nn.Linear(28, 128)
    self.relu1 = nn.ReLU()

    self.layer_2 = nn.Linear(128, 128)
    self.relu2 = nn.ReLU()

    self.layer_3 = nn.Linear(128, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.relu1(self.layer_1(x))
    x = self.relu2(self.layer_2(x))
    x = self.layer_3(x)
    x = self.sigmoid(x)
    return x

In [1167]:
model_1 = ChurnModel()
model_2 = ChurnModelV2()
model_3 = ChurnModelV3()

In [1168]:
with torch.inference_mode():
  y_pred = model_1(X_train)
  y_pred_probs = torch.sigmoid(y_pred)
  y_lables = torch.round(y_pred_probs)

print(f"Initial model accuracy : {accuracy_score(y_lables, y_train) * 100:.4f} %\n")
print(f"Initial Confusion matrix :\n {confusion_matrix(y_lables, y_train)}")

Initial model accuracy : 27.2178 %

Initial Confusion matrix :
 [[ 853  822]
 [3272  678]]


In [1169]:
with torch.inference_mode():
  y_pred = model_2(X_train)
  y_pred_probs = torch.sigmoid(y_pred)
  y_lables = torch.round(y_pred_probs)

print(f"Initial model accuracy : {accuracy_score(y_lables, y_train) * 100:.4f} %\n")
print(f"Initial Confusion matrix :\n {confusion_matrix(y_lables, y_train)}")

Initial model accuracy : 49.1022 %

Initial Confusion matrix :
 [[1558  296]
 [2567 1204]]


In [1170]:
with torch.inference_mode():
  y_pred = model_3(X_train)
  y_preds = torch.round(y_pred)

print(f"Initial model accuracy : {accuracy_score(y_preds, y_train) * 100:.4f} %\n")
print(f"Initial Confusion matrix :\n {confusion_matrix(y_preds, y_train)}")

Initial model accuracy : 35.9289 %

Initial Confusion matrix :
 [[1041  520]
 [3084  980]]


In [1171]:
loss_fn_1 = nn.BCEWithLogitsLoss()
loss_fn_2 = nn.BCELoss()
SGD_optim_1 = torch.optim.SGD(params = model_1.parameters(), lr = 0.001)
Adam_optim_1 = torch.optim.Adam(params = model_1.parameters(), lr = 0.001)
RMSprop_optim_1 = torch.optim.RMSprop(params = model_1.parameters(), lr = 0.001)
SGD_optim_2 = torch.optim.SGD(params = model_2.parameters(), lr = 0.001)
Adam_optim_2 = torch.optim.Adam(params = model_2.parameters(), lr = 0.001)
RMSprop_optim_2 = torch.optim.RMSprop(params = model_2.parameters(), lr = 0.001)
SGD_optim_3 = torch.optim.SGD(params = model_3.parameters(), lr = 0.001)
Adam_optim_3 = torch.optim.Adam(params = model_3.parameters(), lr = 0.001)
RMSprop_optim_3 = torch.optim.RMSprop(params = model_3.parameters(), lr = 0.001)

In [1172]:
def model_loopV1(optimizer, loss_fn, model, epochs=150, limit=20):
    best_accuracy = 0
    patience_counter = 0

    epoch_counts = []
    test_loss_values = []
    train_loss_values = []
    test_accuracies = []

    for epoch in range(epochs):
        model.train()

        y_logits = model(X_train)
        y_pred_probs = torch.sigmoid(y_logits)
        y_labels = torch.round(y_pred_probs)

        loss = loss_fn(y_logits, y_train)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        model.eval()
        with torch.inference_mode():
            test_logits = model(X_test)
            test_pred_probs = torch.sigmoid(test_logits)
            test_labels = torch.round(test_pred_probs)

            test_loss = loss_fn(test_logits, y_test)
            test_accuracy = accuracy_score(test_labels, y_test)

            epoch_counts.append(epoch)
            test_loss_values.append(test_loss.item())
            train_loss_values.append(loss.item())
            test_accuracies.append(test_accuracy)

            # Early stopping check
            if test_accuracy > best_accuracy:
                best_accuracy = test_accuracy
                patience_counter = 0
                torch.save(model.state_dict(), "best_model.pth")  # Save best model
            else:
                patience_counter += 1

            if epoch % 10 == 0:
                print(f"Epoch {epoch} | Train Loss: {loss:.4f} | Test Loss: {test_loss:.4f} | Accuracy: {test_accuracy:.4f}")

            if patience_counter >= limit:
                print(f"\n Early stopping at epoch {epoch} — no improvement in last {limit} epochs.")
                break

    print(f"\n Best accuracy: {best_accuracy:.4f}")


In [1173]:
def model_loopV2(optimizer, loss_fn, model, epochs=150, limit=20):
    best_accuracy = 0
    patience_counter = 0

    epoch_counts = []
    test_loss_values = []
    train_loss_values = []
    test_accuracies = []

    for epoch in range(epochs):
        model.train()

        y_preds = model(X_train)
        y_pred = torch.round(y_preds)

        loss = loss_fn(y_pred, y_train)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        model.eval()
        with torch.inference_mode():
            test_preds = model(X_test)
            test_pred = torch.round(test_preds)

            test_loss = loss_fn(test_pred, y_test)
            test_accuracy = accuracy_score(test_pred, y_test)

            epoch_counts.append(epoch)
            test_loss_values.append(test_loss.item())
            train_loss_values.append(loss.item())
            test_accuracies.append(test_accuracy)

            # Early stopping check
            if test_accuracy > best_accuracy:
                best_accuracy = test_accuracy
                patience_counter = 0
                torch.save(model.state_dict(), "best_model.pth")  # Save best model
            else:
                patience_counter += 1

            if epoch % 10 == 0:
                print(f"Epoch {epoch} | Train Loss: {loss:.4f} | Test Loss: {test_loss:.4f} | Accuracy: {test_accuracy:.4f}")

            if patience_counter >= limit:
                print(f"\n Early stopping at epoch {epoch} — no improvement in last {limit} epochs.")
                break

    print(f"\n Best accuracy: {best_accuracy:.4f}")


# Stochastic Gradient Descent

In [1174]:
model_loopV1(SGD_optim_1, loss_fn_1, model_1, limit=50) # Stochastic Gradient Descent

Epoch 0 | Train Loss: 0.7141 | Test Loss: 0.7136 | Accuracy: 0.2800
Epoch 10 | Train Loss: 0.7112 | Test Loss: 0.7106 | Accuracy: 0.2999
Epoch 20 | Train Loss: 0.7084 | Test Loss: 0.7077 | Accuracy: 0.3276
Epoch 30 | Train Loss: 0.7056 | Test Loss: 0.7049 | Accuracy: 0.3447
Epoch 40 | Train Loss: 0.7029 | Test Loss: 0.7021 | Accuracy: 0.3817
Epoch 50 | Train Loss: 0.7003 | Test Loss: 0.6994 | Accuracy: 0.4193
Epoch 60 | Train Loss: 0.6977 | Test Loss: 0.6968 | Accuracy: 0.4534
Epoch 70 | Train Loss: 0.6951 | Test Loss: 0.6942 | Accuracy: 0.4996
Epoch 80 | Train Loss: 0.6927 | Test Loss: 0.6916 | Accuracy: 0.5380
Epoch 90 | Train Loss: 0.6902 | Test Loss: 0.6891 | Accuracy: 0.5849
Epoch 100 | Train Loss: 0.6878 | Test Loss: 0.6867 | Accuracy: 0.6247
Epoch 110 | Train Loss: 0.6855 | Test Loss: 0.6843 | Accuracy: 0.6681
Epoch 120 | Train Loss: 0.6832 | Test Loss: 0.6819 | Accuracy: 0.6937
Epoch 130 | Train Loss: 0.6809 | Test Loss: 0.6796 | Accuracy: 0.7122
Epoch 140 | Train Loss: 0.6787 

In [1175]:
model_loopV1(SGD_optim_2, loss_fn_1, model_2, limit=50)

Epoch 0 | Train Loss: 0.7007 | Test Loss: 0.6966 | Accuracy: 0.4407
Epoch 10 | Train Loss: 0.6846 | Test Loss: 0.6780 | Accuracy: 0.5650
Epoch 20 | Train Loss: 0.6690 | Test Loss: 0.6642 | Accuracy: 0.6212
Epoch 30 | Train Loss: 0.6555 | Test Loss: 0.6513 | Accuracy: 0.6503
Epoch 40 | Train Loss: 0.6433 | Test Loss: 0.6388 | Accuracy: 0.6859
Epoch 50 | Train Loss: 0.6311 | Test Loss: 0.6273 | Accuracy: 0.7114
Epoch 60 | Train Loss: 0.6222 | Test Loss: 0.6168 | Accuracy: 0.7363
Epoch 70 | Train Loss: 0.6140 | Test Loss: 0.6074 | Accuracy: 0.7427
Epoch 80 | Train Loss: 0.6035 | Test Loss: 0.5988 | Accuracy: 0.7541
Epoch 90 | Train Loss: 0.5950 | Test Loss: 0.5909 | Accuracy: 0.7576
Epoch 100 | Train Loss: 0.5864 | Test Loss: 0.5837 | Accuracy: 0.7633
Epoch 110 | Train Loss: 0.5801 | Test Loss: 0.5770 | Accuracy: 0.7704
Epoch 120 | Train Loss: 0.5729 | Test Loss: 0.5709 | Accuracy: 0.7747
Epoch 130 | Train Loss: 0.5695 | Test Loss: 0.5651 | Accuracy: 0.7761
Epoch 140 | Train Loss: 0.5619 

In [1184]:
model_loopV2(SGD_optim_3, loss_fn_2, model_3, limit = 50)

Epoch 0 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 10 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 20 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 30 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 40 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 50 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674

 Early stopping at epoch 50 — no improvement in last 50 epochs.

 Best accuracy: 0.3674


# Adaptive moment estimator (Adam)

In [1177]:
model_loopV1(Adam_optim_1, loss_fn_1, model_1)

Epoch 0 | Train Loss: 0.6766 | Test Loss: 0.6526 | Accuracy: 0.7377
Epoch 10 | Train Loss: 0.5285 | Test Loss: 0.5168 | Accuracy: 0.7377
Epoch 20 | Train Loss: 0.4729 | Test Loss: 0.4663 | Accuracy: 0.7534
Epoch 30 | Train Loss: 0.4343 | Test Loss: 0.4370 | Accuracy: 0.7903
Epoch 40 | Train Loss: 0.4256 | Test Loss: 0.4341 | Accuracy: 0.7953
Epoch 50 | Train Loss: 0.4197 | Test Loss: 0.4312 | Accuracy: 0.7996
Epoch 60 | Train Loss: 0.4153 | Test Loss: 0.4286 | Accuracy: 0.8060
Epoch 70 | Train Loss: 0.4117 | Test Loss: 0.4271 | Accuracy: 0.8038
Epoch 80 | Train Loss: 0.4085 | Test Loss: 0.4261 | Accuracy: 0.8024

 Early stopping at epoch 82 — no improvement in last 20 epochs.

 Best accuracy: 0.8067


In [1178]:
model_loopV1(Adam_optim_2, loss_fn_1, model_2)

Epoch 0 | Train Loss: 0.5572 | Test Loss: 0.5225 | Accuracy: 0.7918
Epoch 10 | Train Loss: 0.4320 | Test Loss: 0.4659 | Accuracy: 0.7711
Epoch 20 | Train Loss: 0.4068 | Test Loss: 0.4348 | Accuracy: 0.7946
Epoch 30 | Train Loss: 0.3979 | Test Loss: 0.4282 | Accuracy: 0.7967
Epoch 40 | Train Loss: 0.3887 | Test Loss: 0.4255 | Accuracy: 0.7946
Epoch 50 | Train Loss: 0.3837 | Test Loss: 0.4239 | Accuracy: 0.8010
Epoch 60 | Train Loss: 0.3762 | Test Loss: 0.4227 | Accuracy: 0.7982
Epoch 70 | Train Loss: 0.3705 | Test Loss: 0.4235 | Accuracy: 0.7996

 Early stopping at epoch 71 — no improvement in last 20 epochs.

 Best accuracy: 0.8024


In [1179]:
model_loopV2(Adam_optim_3, limit = 50)

Epoch 0 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 10 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 20 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 30 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 40 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 50 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674

 Early stopping at epoch 50 — no improvement in last 50 epochs.

 Best accuracy: 0.3674


# RMSprop

In [1180]:
model_loopV1(RMSprop_optim_1, loss_fn_1, model_1, limit=50)

Epoch 0 | Train Loss: 0.4076 | Test Loss: 0.4334 | Accuracy: 0.8010
Epoch 10 | Train Loss: 0.4019 | Test Loss: 0.4257 | Accuracy: 0.8024
Epoch 20 | Train Loss: 0.3996 | Test Loss: 0.4256 | Accuracy: 0.8017
Epoch 30 | Train Loss: 0.3978 | Test Loss: 0.4258 | Accuracy: 0.8038
Epoch 40 | Train Loss: 0.3961 | Test Loss: 0.4262 | Accuracy: 0.8045
Epoch 50 | Train Loss: 0.3954 | Test Loss: 0.4275 | Accuracy: 0.8003

 Early stopping at epoch 56 — no improvement in last 50 epochs.

 Best accuracy: 0.8053


In [1181]:
model_loopV1(RMSprop_optim_2, loss_fn_1, model_2, limit=50)

Epoch 0 | Train Loss: 0.3706 | Test Loss: 0.4551 | Accuracy: 0.7854
Epoch 10 | Train Loss: 0.3626 | Test Loss: 0.4312 | Accuracy: 0.7960
Epoch 20 | Train Loss: 0.3561 | Test Loss: 0.4345 | Accuracy: 0.7910
Epoch 30 | Train Loss: 0.3493 | Test Loss: 0.4396 | Accuracy: 0.7925
Epoch 40 | Train Loss: 0.3472 | Test Loss: 0.4544 | Accuracy: 0.8003
Epoch 50 | Train Loss: 0.3320 | Test Loss: 0.4511 | Accuracy: 0.7925
Epoch 60 | Train Loss: 0.3342 | Test Loss: 0.4617 | Accuracy: 0.7939

 Early stopping at epoch 61 — no improvement in last 50 epochs.

 Best accuracy: 0.8074


In [1182]:
model_loopV2(RMSprop_optim_3, limit=50)

Epoch 0 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 10 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 20 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 30 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 40 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674
Epoch 50 | Train Loss: 64.0711 | Test Loss: 63.2552 | Accuracy: 0.3674

 Early stopping at epoch 50 — no improvement in last 50 epochs.

 Best accuracy: 0.3674
