In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch, torch.nn as nn
from torch.nn import BCEWithLogitsLoss
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import f1_score, recall_score, precision_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay, accuracy_score, classification_report

In [8]:
data = pd.read_csv("Data/Social_Network_Ads.csv")
data = data.drop(columns = ["User ID"])
data.head(3)

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0


In [9]:
X = data.drop(columns = "Purchased")
y = data["Purchased"].to_numpy()

X_train_df, X_val_df, y_train, y_val = train_test_split(X, y, test_size = 0.20, stratify = y, random_state = 42)

numeric = X.select_dtypes(include = "number").columns
categorical = X.select_dtypes(exclude = "number").columns

pre = ColumnTransformer([
    ("num", Pipeline([
        ("imp", SimpleImputer(strategy = "median")),
        ("scaler", StandardScaler())
    ]), numeric),

    ("cat", Pipeline([
        ("imp", SimpleImputer(strategy = "most_frequent")),
        ("ohe", OneHotEncoder(handle_unknown= "ignore"))
    ]), categorical)
])

X_train = pre.fit_transform(X_train_df)
X_val = pre.transform(X_val_df)


Xtr = torch.from_numpy(X_train)
ytr = torch.from_numpy(y_train).reshape(-1,1)
Xva = torch.from_numpy(X_val)
yva = torch.from_numpy(y_val).reshape(-1,1)

train_ds = TensorDataset(Xtr, ytr)
val_ds = TensorDataset(Xva, yva)

BATCH_SIZE = 56

train_loader = DataLoader(train_ds, batch_size = BATCH_SIZE, shuffle = True, drop_last = False)
val_loader = DataLoader(val_ds, batch_size = BATCH_SIZE, shuffle = False, drop_last = False)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cpu'

In [10]:
pos = (y_train == 1).sum()
neg = (y_train == 0).sum()
pos_weight_value = neg / max(pos, 1)
print(f"pos_weight is: {pos_weight_value:.3f}")

pos_weight_value = torch.tensor([pos_weight_value], dtype = torch.float32, device = DEVICE)
print(type(pos_weight_value))

pos_weight is: 1.807
<class 'torch.Tensor'>


In [11]:
class OneHiddenLayer(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(n_features, 1)
        )

    def forward(self, x):
        return self.layers(x)

In [12]:
n_features = X_train.shape[1]
n_features

4

In [37]:
learning_rate = 0.001
epochs = 1000
weight_decay_value = 0.01
threshold = 0.3

model = OneHiddenLayer(n_features = n_features).to(DEVICE).float()

optimizer = torch.optim.AdamW(model.parameters(), lr = learning_rate, weight_decay = weight_decay_value)

loss_function = nn.BCEWithLogitsLoss(pos_weight = pos_weight_value)

In [41]:
for epoch in range(1, epochs + 1):
    model.train()
    running_train_loss = 0.0
    rows_seen = 0.0
    train_correct = 0.0

    for xb, yb in train_loader:
        xb = xb.to(DEVICE).float()
        yb = yb.to(DEVICE).float().view(-1, 1)
        
        optimizer.zero_grad()

        logits = model(xb)
        train_loss = loss_function(logits, yb)
        train_loss.backward()
        optimizer.step()

        running_train_loss += train_loss.item() * xb.size(0)
        rows_seen += xb.size(0)

        with torch.no_grad():
            train_probs = torch.sigmoid(logits)
            train_preds = (train_probs >= threshold).float()
            train_correct += (train_preds == yb).sum().item()

    epoch_loss_per_row = running_train_loss / rows_seen
    epoch_accuracy = train_correct / rows_seen


    model.eval()
    val_running_loss = 0.0
    val_seen = 0.0
    val_correct = 0.0
    val_probs_list = []

    val_pred_list = []
    y_true_list = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(DEVICE).float()
            yb = yb.to(DEVICE).float().view(-1, 1)

            val_logits = model(xb)
            val_loss = loss_function(val_logits, yb)

            val_running_loss += val_loss.item() * xb.size(0)
            val_seen += xb.size(0)
            
            val_probs = torch.sigmoid(val_logits)
            val_preds = (val_probs >= threshold).float()
            val_probs_list.append(val_probs)
            val_pred_list.append(val_preds)
            y_true_list.append(yb)
            val_correct += (val_preds == yb).sum().item()


        val_probs_list = np.concatenate(val_probs_list)
        val_pred_list = np.concatenate(val_pred_list)
        y_true_list = np.concatenate(y_true_list)

        val_accuracy = val_correct / val_seen
        val_epoch_loss_per_row = val_running_loss / val_seen
        val_precision = precision_score(y_true_list, val_pred_list)
        val_recall = recall_score(y_true_list, val_pred_list)
        val_f1 = f1_score(y_true_list, val_pred_list)

    if epoch % 100 == 0:   # results for every 5th epoch
        print(f"For Epoch # {epoch}")
        print(f"Train Accuracy: {round(epoch_accuracy, 3)}")
        print(f"Val Accuracy: {round(val_accuracy, 3)}")
        print(f"Train Loss: {round(epoch_loss_per_row, 3)}")
        print(f"Val Loss: {round(val_epoch_loss_per_row, 2)}")
        print(f"Val Precision: {round(val_precision, 3)}")
        print(f"Val Recall: {round(val_recall, 3)}")
        print(f"Val F1: {round(val_f1, 3)}")
        print()


For Epoch # 100
Train Accuracy: 0.806
Val Accuracy: 0.725
Train Loss: 0.44
Val Loss: 0.51
Val Precision: 0.571
Val Recall: 0.966
Val F1: 0.718

For Epoch # 200
Train Accuracy: 0.806
Val Accuracy: 0.725
Train Loss: 0.44
Val Loss: 0.51
Val Precision: 0.571
Val Recall: 0.966
Val F1: 0.718

For Epoch # 300
Train Accuracy: 0.806
Val Accuracy: 0.725
Train Loss: 0.44
Val Loss: 0.51
Val Precision: 0.571
Val Recall: 0.966
Val F1: 0.718

For Epoch # 400
Train Accuracy: 0.806
Val Accuracy: 0.725
Train Loss: 0.44
Val Loss: 0.51
Val Precision: 0.571
Val Recall: 0.966
Val F1: 0.718

For Epoch # 500
Train Accuracy: 0.806
Val Accuracy: 0.725
Train Loss: 0.44
Val Loss: 0.51
Val Precision: 0.571
Val Recall: 0.966
Val F1: 0.718

For Epoch # 600
Train Accuracy: 0.806
Val Accuracy: 0.725
Train Loss: 0.44
Val Loss: 0.51
Val Precision: 0.571
Val Recall: 0.966
Val F1: 0.718

For Epoch # 700
Train Accuracy: 0.806
Val Accuracy: 0.725
Train Loss: 0.44
Val Loss: 0.51
Val Precision: 0.571
Val Recall: 0.966
Val F1: