In [1]:
import csv
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import seaborn as sns
import wandb
import yaml

from sklearn.impute import KNNImputer
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
"""Classes"""


class MLP(nn.Module):
    def __init__(self, input_size, output_size, dropout_rate):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.ln1 = nn.LayerNorm(256)
        self.fc2 = nn.Linear(256, 128)
        self.ln2 = nn.LayerNorm(128)
        self.fc3 = nn.Linear(128, 64)
        self.ln3 = nn.LayerNorm(64)
        self.fc4 = nn.Linear(64, 32)
        self.ln4 = nn.LayerNorm(32)
        self.output_layer = nn.Linear(32, output_size)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.relu(self.ln1(self.fc1(x)))
        x = self.dropout(x)
        x = self.relu(self.ln2(self.fc2(x)))
        x = self.dropout(x)
        x = self.relu(self.ln3(self.fc3(x)))
        x = self.dropout(x)
        x = self.relu(self.ln4(self.fc4(x)))
        x = self.dropout(x)
        x = self.output_layer(x)
        return x



class EarlyStopping:
    def __init__(self, patience=10, delta=0):
        self.patience = patience
        self.delta = delta
        self.best_loss = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0


class HybridOptimizer:
    def __init__(self, optimizers):
        self.optimizers = optimizers

    def zero_grad(self):
        for opt in self.optimizers:
            opt.zero_grad()

    def average_step(self):
        
        grad_accum = {}
        num_opts = len(self.optimizers)

        for opt in self.optimizers:
            for group in opt.param_groups:
                for param in group["params"]:
                    if param.grad is None:
                        continue
                    if param in grad_accum:
                        grad_accum[param] += param.grad.detach().clone()
                    else:
                        grad_accum[param] = param.grad.detach().clone()

        for param in grad_accum:
            grad_accum[param] /= num_opts
            param.grad = grad_accum[param]

        for opt in self.optimizers:
            opt.step()

In [3]:
"""Functions"""


def plot_feature_distributions(data, data_imputed, features):
    plt.figure(figsize=(15, 10))
    for i, feature in enumerate(features):
        plt.subplot(4, 3, i + 1)
        sns.histplot(
            data[feature].dropna(),
            kde=True,
            label="Before Imputation",
            color="blue",
            bins=30,
            alpha=0.5,
        )
        sns.histplot(
            data_imputed[feature].dropna(),
            kde=True,
            label="After Imputation",
            color="red",
            bins=30,
            alpha=0.5,
        )
        plt.title(f"{feature}")
        plt.xlabel("")
        plt.ylabel("")
        plt.legend()
    plt.tight_layout()
    plt.show()


def plot_box_plots_comparison(data, data_imputed, features):
    plt.figure(figsize=(15, 12))

    for i, feature in enumerate(features):
        plt.subplot(4, 3, i + 1)

        df_before_plot = data[[feature]].copy()
        df_before_plot["Imputation Status"] = "Before Imputation"

        df_after_plot = data_imputed[[feature]].copy()
        df_after_plot["Imputation Status"] = "After Imputation"

        df_plot = pd.concat([df_before_plot, df_after_plot], ignore_index=True)

        sns.boxplot(
            x="Imputation Status",
            y=feature,
            data=df_plot,
            hue="Imputation Status",
            palette="Set2",
            showfliers=True,
        )

        plt.title(f"{feature}")
        plt.xlabel("")
        plt.ylabel("")

    plt.tight_layout()
    plt.show()


def plot_correlation_heatmaps(data, data_imputed, features):
    corr_before = data[features].corr()
    corr_after = data_imputed[features].corr()

    plt.figure(figsize=(16, 8))

    plt.subplot(1, 2, 1)
    sns.heatmap(corr_before, annot=True, cmap="coolwarm", fmt=".2f")
    plt.title("Correlation Before Imputation")

    plt.subplot(1, 2, 2)
    sns.heatmap(corr_after, annot=True, cmap="coolwarm", fmt=".2f")
    plt.title("Correlation After Imputation")

    plt.tight_layout()
    plt.show()


def single_plot_feature_distributions(data, features):
    plt.figure(figsize=(15, 10))
    for i, feature in enumerate(features):
        plt.subplot(4, 3, i + 1)
        sns.histplot(
            data[feature].dropna(),
            kde=True,
            label="Normalized",
            color="blue",
            bins=30,
            alpha=1,
        )
        plt.title(f"{feature}")
        plt.xlabel("")
        plt.ylabel("")
        plt.legend()
    plt.tight_layout()
    plt.show()


def impute_features(data, target):
    impute_features = [
        "Length (major axis)",
        "Width (minor axis)",
        "Thickness (depth)",
        "Area",
        "Perimeter",
        "Solidity",
        "Compactness",
        "Extent",
        "Convex hull(convex area)",
    ]

    knn_imputer = KNNImputer(n_neighbors=53)  # n_neighbors = root of dataset size

    data_imputed = data.copy()

    types = data[target].unique()
    for almond_type in types:

        type_data = data[data[target] == almond_type].copy()

        type_features = type_data[impute_features]

        imputed_values = knn_imputer.fit_transform(type_features)

        type_data[impute_features] = imputed_values

        data_imputed.update(type_data)

    data_imputed["Roundness"] = (4 * data_imputed["Area"]) / (
        np.pi * data_imputed["Length (major axis)"] ** 2
    )

    data_imputed["Aspect Ratio"] = (
        data_imputed["Length (major axis)"] / data_imputed["Width (minor axis)"]
    )

    data_imputed["Eccentricity"] = np.sqrt(
        1
        - (data_imputed["Width (minor axis)"] / data_imputed["Length (major axis)"])
        ** 2
    )

    return data_imputed


def cap_outliers_percentiles(data, feature, lower_percentile=5, upper_percentile=95):
    lower_limit = np.percentile(data[feature], lower_percentile)
    upper_limit = np.percentile(data[feature], upper_percentile)

    data[feature] = np.where(data[feature] < lower_limit, lower_limit, data[feature])
    data[feature] = np.where(data[feature] > upper_limit, upper_limit, data[feature])


def normalize_data(data, features):
    scaler = StandardScaler()
    normalized_data = data.copy()
    normalized_data[features] = scaler.fit_transform(data[features])
    return normalized_data


def trainer(
    model,
    criterion,
    optimizer,
    early_stopping,
    x_train_tensor,
    y_train_tensor,
    x_val_tensor,
    y_val_tensor,
    num_epochs,
    hybrid,
    graph,
):

    if graph:
        wandb.init(project="COS 711_Assignment 2")

    for epoch in range(num_epochs):
        model.train()

        outputs = model(x_train_tensor)
        loss = criterion(outputs, y_train_tensor)

        optimizer.zero_grad()
        loss.backward()

        if hybrid and isinstance(optimizer, HybridOptimizer):
            grad_info = {}
            for name, param in model.named_parameters():
                if param.grad is not None:
                    grad_info[name] = param.grad.clone().cpu().numpy()

            optimizer.average_step()
        else:
            optimizer.step()

        train_loss = loss.item()

        val_accuracy, val_loss = evaluate_model(
            model, x_val_tensor, y_val_tensor, criterion
        )

        if graph:
            wandb.log(
                {
                    "Epoch": epoch,
                    "Training Loss": train_loss,
                    "Validation Loss": val_loss,
                    "Validation Accuracy": val_accuracy,
                    "Optimizer": optimizer.__class__.__name__,
                }
            )

        if (epoch + 1) % 50 == 0:
            print(
                f"Epoch [{epoch + 1}/{num_epochs}], Training Loss: {train_loss:.4f}, "
                f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}, "
                f"Optimizer: {optimizer.__class__.__name__}"
            )

        early_stopping(val_loss)
        if early_stopping.early_stop:
            wandb.finish()
            print(f"Early stopping triggered, Epoch [{epoch + 1}/{num_epochs}]")

            return train_loss, val_loss, val_accuracy, (epoch + 1)


def evaluate_model(model, x_tensor, y_tensor, criterion):
    model.eval()
    total_samples = len(y_tensor)

    with torch.no_grad():
        outputs = model(x_tensor)
        loss = criterion(outputs, y_tensor).item()
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == y_tensor).sum().item() / total_samples

    return accuracy, loss


def grid_search(
    optimizer,
    learning_rates,
    dropout_rates,
    input_size,
    output_size,
    x,
    y_encoded,
    num_epochs,
    k_folds,
    hybrid,
):
    date_time_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    if hybrid:
        optimizer_name = "HybridOptimizer"
    else:
        optimizer_name = optimizer.__name__
    filename = f"results/grid_search_optimizer_{optimizer_name}_{date_time_str}.csv"
    for lr in learning_rates:
        for dr in dropout_rates:
            kf = KFold(n_splits=k_folds, shuffle=True, random_state=123)
            fold_train_losses = []
            fold_val_losses = []
            fold_val_accuracies = []
            fold_total_epochs = []

            for fold, (train_idx, val_idx) in enumerate(kf.split(x)):
                set_seed(fold)

                print(
                    f"Learning Rate {lr}, Dropout Rate {dr}, Fold {fold + 1}/{k_folds}"
                )

                x_train, x_val = x[train_idx], x[val_idx]
                y_train, y_val = y_encoded[train_idx], y_encoded[val_idx]

                x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
                y_train_tensor = torch.tensor(y_train, dtype=torch.long)
                x_val_tensor = torch.tensor(x_val, dtype=torch.float32)
                y_val_tensor = torch.tensor(y_val, dtype=torch.long)

                model = MLP(input_size, output_size, dr)
                criterion = nn.CrossEntropyLoss()

                if hybrid:
                    hybrid_optimizers = [
                        optimizer["sgd"](model.parameters(), lr=lr),
                        optimizer["adam"](model.parameters(), lr=lr),
                        optimizer["rprop"](model.parameters()),
                    ]
                    active_optimizer = HybridOptimizer(hybrid_optimizers)
                else:
                    active_optimizer = optimizer(model.parameters(), lr=lr)

                early_stopping = EarlyStopping(patience=50, delta=0.001)

                (
                    final_train_loss,
                    final_val_loss,
                    final_val_accuracy,
                    total_epochs,
                    gradient_data,
                ) = trainer(
                    model,
                    criterion,
                    active_optimizer,
                    early_stopping,
                    x_train_tensor,
                    y_train_tensor,
                    x_val_tensor,
                    y_val_tensor,
                    num_epochs,
                    hybrid,
                    False,
                )

                print(total_epochs)

                fold_train_losses.append(final_train_loss)
                fold_val_losses.append(final_val_loss)
                fold_val_accuracies.append(final_val_accuracy)
                fold_total_epochs.append(total_epochs)

            avg_train_loss = np.mean(fold_train_losses)
            avg_val_loss = np.mean(fold_val_losses)
            avg_val_accuracy = np.mean(fold_val_accuracies)
            avg_total_epochs = np.mean(fold_total_epochs)
            std_train_loss = np.std(fold_train_losses)
            std_val_loss = np.std(fold_val_losses)
            std_val_accuracy = np.std(fold_val_losses)
            std_total_epochs = np.std(fold_total_epochs)

            save_grid_search_results(
                lr,
                dr,
                avg_train_loss,
                avg_val_loss,
                avg_val_accuracy,
                avg_total_epochs,
                std_train_loss,
                std_val_loss,
                std_val_accuracy,
                std_total_epochs,
                filename,
            )


def train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    num_tests,
    optimizer,
    num_epochs,
    hybrid,
    lr,
    dr,
    filename,
):
    fold_train_losses = []
    fold_val_losses = []
    fold_val_accuracies = []
    fold_test_accuracies = []
    fold_total_epochs = []

    for _ in range(num_tests):
        model = MLP(input_size, output_size, dr)

        criterion = nn.CrossEntropyLoss()

        if hybrid:
            hybrid_optimizers = [
                optimizer["sgd"](model.parameters(), lr=lr),
                optimizer["adam"](model.parameters(), lr=lr),
                optimizer["rprop"](model.parameters()),
            ]
            active_optimizer = HybridOptimizer(hybrid_optimizers)
        else:
            active_optimizer = optimizer(model.parameters(), lr=lr)

        early_stopping = EarlyStopping(patience=50, delta=0.001)

        random_seed = random.randint(0, 100000)

        run_x_train, run_x_val, run_y_train, run_y_val = train_test_split(
            x_train, y_train, train_size=0.8, random_state=random_seed
        )

        x_train_tensor = torch.tensor(run_x_train, dtype=torch.float32)
        y_train_tensor = torch.tensor(run_y_train, dtype=torch.long)
        x_val_tensor = torch.tensor(run_x_val, dtype=torch.float32)
        y_val_tensor = torch.tensor(run_y_val, dtype=torch.long)
        x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
        y_test_tensor = torch.tensor(y_test, dtype=torch.long)

        (
            final_train_loss,
            final_val_loss,
            final_val_accuracy,
            total_epochs,
        ) = trainer(
            model,
            criterion,
            active_optimizer,
            early_stopping,
            x_train_tensor,
            y_train_tensor,
            x_val_tensor,
            y_val_tensor,
            num_epochs,
            hybrid,
            True,
        )

        test_accuracy, loss = evaluate_model(
            model, x_test_tensor, y_test_tensor, criterion
        )

        fold_train_losses.append(final_train_loss)
        fold_val_losses.append(final_val_loss)
        fold_val_accuracies.append(final_val_accuracy)
        fold_test_accuracies.append(test_accuracy)
        fold_total_epochs.append(total_epochs)

    avg_train_loss = np.mean(fold_train_losses)
    avg_val_loss = np.mean(fold_val_losses)
    avg_val_accuracy = np.mean(fold_val_accuracies)
    avg_test_accuracy = np.mean(fold_test_accuracies)
    avg_total_epochs = np.mean(fold_total_epochs)
    std_train_loss = np.std(fold_train_losses)
    std_val_loss = np.std(fold_val_losses)
    std_val_accuracy = np.std(fold_val_losses)
    std_test_accuracy = np.std(fold_test_accuracies)
    std_total_epochs = np.std(fold_total_epochs)

    save_results(
        lr,
        dr,
        avg_train_loss,
        avg_val_loss,
        avg_val_accuracy,
        avg_test_accuracy,
        avg_total_epochs,
        std_train_loss,
        std_val_loss,
        std_val_accuracy,
        std_total_epochs,
        std_test_accuracy,
        filename,
    )


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)


def save_grid_search_results(
    lr,
    dr,
    avg_train_loss,
    avg_val_loss,
    avg_val_accuracy,
    avg_total_epochs,
    std_train_loss,
    std_val_loss,
    std_val_accuracy,
    std_total_epochs,
    filename,
):

    header = [
        "learning_rate",
        "dropout_rate",
        "avg_train_loss",
        "avg_val_loss",
        "avg_val_accuracy",
        "avg_total_epochs",
        "std_train_loss",
        "std_val_loss",
        "std_val_accuracy",
        "std_total_epochs",
    ]

    row = [
        lr,
        dr,
        avg_train_loss,
        avg_val_loss,
        avg_val_accuracy,
        avg_total_epochs,
        std_train_loss,
        std_val_loss,
        std_val_accuracy,
        std_total_epochs,
    ]

    file_exists = os.path.isfile(filename)

    with open(filename, mode="a", newline="") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(header)
        writer.writerow(row)

    print(f"Results saved to {filename}")


def save_results(
    lr,
    dr,
    avg_train_loss,
    avg_val_loss,
    avg_val_accuracy,
    avg_test_accuracy,
    avg_total_epochs,
    std_train_loss,
    std_val_loss,
    std_val_accuracy,
    std_total_epochs,
    std_test_accuracy,
    filename,
):

    header = [
        "learning_rate",
        "dropout_rate",
        "avg_train_loss",
        "avg_val_loss",
        "avg_val_accuracy",
        "avg_test_accuracy",
        "avg_total_epochs",
        "std_train_loss",
        "std_val_loss",
        "std_val_accuracy",
        "std_total_epochs",
        "std_test_accuracy",
    ]

    row = [
        lr,
        dr,
        avg_train_loss,
        avg_val_loss,
        avg_val_accuracy,
        avg_test_accuracy,
        avg_total_epochs,
        std_train_loss,
        std_val_loss,
        std_val_accuracy,
        std_total_epochs,
        std_test_accuracy,
    ]

    file_exists = os.path.isfile(filename)

    with open(filename, mode="a", newline="") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(header)
        writer.writerow(row)

    print(f"Results saved to {filename}")

In [4]:
""""Configuration"""

with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

random_seed = random.randint(0, 100000)

print_graphs = config["print_graphs"]
perform_grid_search = config["perform_grid_search"]
features = config["features"]
target = config["target"]
num_epochs = config["num_epochs"]
learning_rate = config["learning_rate"]
dropout_rate = config["dropout_rate"]
learning_rates = config["learning_rates"]
dropout_rates = config["dropout_rates"]

np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
"""Data setup"""

data = pd.read_csv("data/almond_data.csv")

data = data.iloc[:, 1:]

data.head()

data.columns = data.columns.str.strip()

data_imputed = impute_features(data, target)

for feature in features:
    cap_outliers_percentiles(data_imputed, feature)

if print_graphs:
    plot_feature_distributions(data, data_imputed, features)
    plot_box_plots_comparison(data, data_imputed, features)
    plot_correlation_heatmaps(data, data_imputed, features)

normalized_data = normalize_data(data_imputed, features)

if print_graphs:
    single_plot_feature_distributions(normalized_data, features)

x = normalized_data[features].values
y = normalized_data[target].values

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

input_size = len(features)
output_size = len(label_encoder.classes_)

x_test, x_train, y_test, y_train = train_test_split(
    x, y_encoded, train_size=0.2, random_state=123
)

In [None]:

"""Grid search with k fold cross validation using seed 123"""
if perform_grid_search:

    set_seed(123)
    
    """SGD"""
    
    optimizer = optim.SGD

    grid_search(
        optimizer,
        learning_rates,
        dropout_rates,
        input_size,
        output_size,
        x_train,
        y_train,
        num_epochs,
        k_folds=5,
        hybrid=False,
    )
    
    """Adam"""
    
    set_seed(123)
    
    optimizer = optim.Adam

    grid_search(
        optimizer,
        learning_rates,
        dropout_rates,
        input_size,
        output_size,
        x_train,
        y_train,
        num_epochs,
        k_folds=5,
        hybrid=False,
    )
    
    """Rprop"""
    
    set_seed(123)
    
    optimizer = optim.Rprop

    grid_search(
        optimizer,
        learning_rates,
        dropout_rates,
        input_size,
        output_size,
        x_train,
        y_train,
        num_epochs,
        k_folds=5,
        hybrid=False,
    )
    
    """Hybrid"""
    
    set_seed(123)
    
    hybrid_optimizers = {"sgd": optim.SGD, "adam": optim.Adam, "rprop": optim.Rprop}

    grid_search(
        hybrid_optimizers,
        learning_rates,
        dropout_rates,
        input_size,
        output_size,
        x_train,
        y_train,
        num_epochs,
        k_folds=5,
        hybrid=True,
    )

Learning Rate 0.0001, Dropout Rate 0.0, Fold 1/5
Epoch [50/100000], Training Loss: 1.0790, Validation Loss: 1.1086, Validation Accuracy: 0.2472, Optimizer: SGD
Epoch [100/100000], Training Loss: 1.0648, Validation Loss: 1.0927, Validation Accuracy: 0.2851, Optimizer: SGD
Epoch [150/100000], Training Loss: 1.0520, Validation Loss: 1.0786, Validation Accuracy: 0.3318, Optimizer: SGD
Epoch [200/100000], Training Loss: 1.0404, Validation Loss: 1.0659, Validation Accuracy: 0.3653, Optimizer: SGD
Epoch [250/100000], Training Loss: 1.0297, Validation Loss: 1.0544, Validation Accuracy: 0.4031, Optimizer: SGD
Epoch [300/100000], Training Loss: 1.0199, Validation Loss: 1.0439, Validation Accuracy: 0.4365, Optimizer: SGD
Epoch [350/100000], Training Loss: 1.0109, Validation Loss: 1.0342, Validation Accuracy: 0.4588, Optimizer: SGD
Epoch [400/100000], Training Loss: 1.0025, Validation Loss: 1.0252, Validation Accuracy: 0.4944, Optimizer: SGD
Epoch [450/100000], Training Loss: 0.9945, Validation Lo

In [38]:

"""SGD best"""   

set_seed(123)

optimizer = optim.SGD

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    optimizer,
    num_epochs,
    False,
    0.005,
    0.05,
    "SGD_test_results_best",
)


"""Adam best"""

set_seed(123)

optimizer = optim.Adam

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    optimizer,
    num_epochs,
    False,
    0.0005,
    0.05,
    "Adam_test_results_best",
)

"""Rprop best"""

set_seed(123)

optimizer = optim.Rprop

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    optimizer,
    num_epochs,
    False,
    0.005,
    0.05,
    "RProp_test_results_best",
)

"""Hybrid best"""

set_seed(123)

hybrid_optimizers = {"sgd": optim.SGD, "adam": optim.Adam, "rprop": optim.Rprop}

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    hybrid_optimizers,
    num_epochs,
    True,
    0.005,
    0.05,
    "Hybrid_test_results_best",
)


"""SGD worst"""   

set_seed(123)

optimizer = optim.SGD

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    optimizer,
    num_epochs,
    False,
    0.0001,
    0.5,
    "SGD_test_results_worst",
)


"""Adam worst"""

set_seed(123)

optimizer = optim.Adam

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    optimizer,
    num_epochs,
    False,
    0.0001,
    0.5,
    "Adam_test_results_worst",
)

"""Rprop worst"""

set_seed(123)

optimizer = optim.Rprop

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    optimizer,
    num_epochs,
    False,
    0.0001,
    0.5,
    "RProp_test_results_worst",
)

"""Hybrid worst"""

set_seed(123)

hybrid_optimizers = {"sgd": optim.SGD, "adam": optim.Adam, "rprop": optim.Rprop}

train_test(
    x_test,
    x_train,
    y_test,
    y_train,
    input_size,
    output_size,
    30,
    hybrid_optimizers,
    num_epochs,
    True,
    0.01,
    0.0,
    "Hybrid_test_results_worst",
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mu21432962[0m ([33mu21432962-university-of-pretoria[0m). Use [1m`wandb login --relogin`[0m to force relogin


Epoch [50/100000], Training Loss: 0.9295, Validation Loss: 0.8720, Validation Accuracy: 0.6837, Optimizer: SGD
Epoch [100/100000], Training Loss: 0.8345, Validation Loss: 0.7642, Validation Accuracy: 0.7127, Optimizer: SGD
Epoch [150/100000], Training Loss: 0.7651, Validation Loss: 0.6947, Validation Accuracy: 0.7439, Optimizer: SGD
Epoch [200/100000], Training Loss: 0.7016, Validation Loss: 0.6349, Validation Accuracy: 0.7706, Optimizer: SGD
Epoch [250/100000], Training Loss: 0.6475, Validation Loss: 0.5849, Validation Accuracy: 0.8040, Optimizer: SGD
Epoch [300/100000], Training Loss: 0.5963, Validation Loss: 0.5436, Validation Accuracy: 0.8196, Optimizer: SGD
Epoch [350/100000], Training Loss: 0.5739, Validation Loss: 0.5105, Validation Accuracy: 0.8307, Optimizer: SGD
Epoch [400/100000], Training Loss: 0.5434, Validation Loss: 0.4824, Validation Accuracy: 0.8307, Optimizer: SGD
Epoch [450/100000], Training Loss: 0.5250, Validation Loss: 0.4581, Validation Accuracy: 0.8352, Optimize

KeyboardInterrupt: 