In [None]:
# Matplotlib
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
# Numpy
import numpy as np
# Pandas
import pandas as pd
# Torch
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchmetrics.classification import Accuracy

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(32, 64, dtype = torch.double)
        self.hidden2 = nn.Linear(64, 32, dtype = torch.double)
        self.hidden3 = nn.Linear(32, 16, dtype = torch.double)
        self.fc = nn.Linear(16, 1, dtype = torch.double)
        self.activation = nn.GELU()
        self.sigmoid = torch.sigmoid
        self.loss = torch.nn.CrossEntropyLoss()
        self.accuracy = Accuracy()

    def forward(self, x):
        z1 = self.hidden1(x)
        a1 = self.activation(z1)

        z2 = self.hidden2(a1)
        a2 = self.activation(z2)

        z3 = self.hidden3(a2)
        a3 = self.activation(z3)

        z4 = self.fc(a3)
        a4 = self.sigmoid(z4)

        return a4

In [None]:
import pickle
from sklearn.model_selection import train_test_split

In [None]:
def split_data(dataset):
    # Split dataset into train, validation, and test sets
    train_data, temp_data = train_test_split(dataset, test_size=0.3, shuffle=True)
    valid_data, test_data = train_test_split(temp_data, test_size=0.5, shuffle=True)

    # Define batch size for training, full batch for validation and testing
    batch_size = 128

    # Create DataLoader objects
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)  # Mini-batch for training
    valid_loader = DataLoader(valid_data, batch_size=len(valid_data), shuffle=False)  # Full batch for validation
    test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=False)  # Full batch for testing

    return train_loader, valid_loader, test_loader

In [None]:
def train_model(train_loader, valid_loader, test_loader):
    # Initialize model and optimizer
    model = NeuralNetwork().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, betas=(0.9, 0.999), eps=1e-08)

    # Early Stopping Parameters
    best_val_loss = float('inf')  # Start with a large value
    best_model_state = None
    best_epoch = 0

    num_epochs = 300
    optimizer.zero_grad()
    l2_lambda = 0.001  # L2 regularization strength

    # Track previous losses to check overall improvement
    prev_val_losses = []
    stop_threshold = 5  # Number of times the loss can increase before stopping

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        for batch in train_loader:
            inputs_batch, outputs_batch = batch
            outputs_re = outputs_batch.to(device).reshape(-1, 1).to(dtype=torch.double)
            inputs_re = inputs_batch.to(device).to(dtype=torch.double)

            # Forward pass
            pred = model(inputs_re)
            loss_value = model.loss(pred.float(), outputs_re.float())

            # Add L2 regularization (Ridge)
            l2_norm = sum(p.pow(2).sum() for p in model.parameters())
            loss_value = loss_value + l2_lambda * l2_norm

            # Compute binary accuracy
            accuracy_value = model.accuracy(pred, outputs_re)

            # Backward pass and optimization
            loss_value.backward()
            optimizer.step()
            optimizer.zero_grad()

        # Validation step (full batch)
        model.eval()
        with torch.no_grad():
            inputs_valid, outputs_valid = next(iter(valid_loader))  # Full batch validation
            inputs_valid = inputs_valid.to(device).to(dtype=torch.double)
            outputs_valid = outputs_valid.to(device).reshape(-1, 1).to(dtype=torch.double)

            pred_valid = model(inputs_valid)
            val_loss = model.loss(pred_valid.float(), outputs_valid.float()).item()
            val_accuracy = model.accuracy(pred_valid, outputs_valid)

        # Store validation losses for trend analysis
        prev_val_losses.append(val_loss)

        # Save the best model based on the lowest validation loss
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            best_epoch = epoch + 1  # Store the epoch number (1-based index)

            with open("best_model.pkl", "wb") as f:
                pickle.dump({"model_state": best_model_state, "epoch": best_epoch, "val_loss": best_val_loss}, f)

        # Print progress
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss_value.item():.4f}, Training Accuracy: {accuracy_value.item():.4f}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')

    # Load the best model and epoch based on validation loss
    with open("best_model.pkl", "rb") as f:
        saved_data = pickle.load(f)
        best_model_state = saved_data["model_state"]
        best_epoch = saved_data["epoch"]
        best_val_loss = saved_data["val_loss"]

        model.load_state_dict(best_model_state)  # Load the best model state into the model

    print(f"Best Model Achieved at Epoch: {best_epoch} with Validation Loss: {best_val_loss:.4f}")

    # Final Test Evaluation (full batch)
    model.eval()
    with torch.no_grad():
        inputs_test, outputs_test = next(iter(test_loader))  # Full batch test
        inputs_test = inputs_test.to(device).to(dtype=torch.double)
        outputs_test = outputs_test.to(device).reshape(-1, 1).to(dtype=torch.double)

        pred_test = model(inputs_test)
        test_loss = model.loss(pred_test.float(), outputs_test.float()).item()
        test_accuracy = model.accuracy(pred_test, outputs_test).item()

    print(f'Final Test Loss: {test_loss:.4f}, Final Test Accuracy: {test_accuracy:.4f}')

train_model(split_data(dataset))