In [45]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Subset
from sklearn.model_selection import KFold
import pandas as pd
import matplotlib.pyplot as plt

In [46]:
import os

# This solves this issue: "RuntimeError: MPS backend out of memory" when sending vgg19 to device
os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'

# Models and Parameters

In [47]:

# Params and Hyper Parameters:
hidden_sizes:tuple = (2048, 512)
learning_rate = 0.01
epochs = 20
folds = 5

use_gpu_if_available = True

## Custom NN

In [48]:
def build_hidden_layers(hidden_sizes:tuple[int]):
    hidden_layers = []

    before_size = hidden_sizes[0]

    for hidden_size in hidden_sizes[1:]:
        hidden_layers += [
            nn.Linear(in_features=before_size, out_features=hidden_size) ,
            nn.ReLU(),
        ]
        before_size = hidden_size

    return hidden_layers

In [49]:
# References https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html 

class NeuralNetwork(nn.Module):
    def __init__(self, input_size:int, hidden_sizes:tuple[int], output_size:int):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, hidden_sizes[0]),
            nn.ReLU(),
            *build_hidden_layers(hidden_sizes),
            nn.Linear(hidden_sizes[-1], output_size),
            # nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        # x = self.flatten(x)
        return logits
    

## VGG Network

In [50]:
def get_vgg19_model(input_size:int, hidden_sizes:tuple[int], output_size:int):
    import torchvision.models as models

    vgg19 = models.vgg19(
        # pretrained=False,
        weights= None,
        # Not used because we're specifying our own classifier below:
        # num_classes=output_size,
        # init_weights = True
    )

    # modify the final fully connected layer to get binary classification:
    # num_features = vgg19.classifier[6].in_features
    # vgg19.classifier[6] = torch.nn.Linear(num_features, output_size)

    # vgg19.avgpool= nn.AdaptiveAvgPool2d(output_size=(2, 2))

    vgg19.classifier = nn.Sequential(
        # 6 divisor is a magic number, literally:
        nn.Linear(in_features=int(input_size / 6) , out_features=hidden_sizes[0]) ,
        nn.ReLU(),

        *build_hidden_layers(hidden_sizes),
        
        nn.Dropout(p=0.6),  # Do we need this here? 0.5 by default
            
        nn.Linear(in_features=hidden_sizes[-1] , out_features=output_size),
        nn.LogSoftmax(dim=1)  
    )

    return vgg19


## Running images on non-CPU:

Mac:
Run `brew install libjpeg`

In [51]:

# Init a device with cuda or mps so that it can train faster
from typing import Literal


device: Literal['cuda'] | Literal['mps'] | Literal['cpu'] = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

device = device if use_gpu_if_available else "cpu"
print(f"Using {device} device")

Using mps device


# Run Training and Test Functions

In [52]:

from time import time


def run_training(
        dataset_loader:DataLoader,
        model: nn.Module,
        optimizer: optim.Optimizer,
        epochs:int
        ):
    
    criterion = nn.CrossEntropyLoss()

    train_accuracies = []
    losses = []

    start_time = time()
    for epoch in range(epochs):
        model.train()
        correct = 0
        total = 0
        running_loss = 0.0

        # Pass data to model:
        number_of_samples = len(dataset_loader)
        counter = 1
        for train_x, train_y in dataset_loader:
            print(f"Training {counter} of {number_of_samples}, epoch {epoch}")
            counter += 1
            train_x = train_x.to(device)
            train_y = train_y.to(device)

            optimizer.zero_grad()
            outputs = model(train_x)
            loss = criterion(outputs, train_y)
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs, 1)
            total += train_y.size(0)
            correct += (predicted == train_y).sum().item()
            running_loss += loss.item()

        loss = running_loss / len(dataset_loader)
        losses.append(loss)

        train_accuracy = (correct / total) * 100
        train_accuracies.append(train_accuracy)

        print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}, Train Accuracy: {train_accuracy:.2f}%. Took {time()-start_time} ms')
        
        start_time = time()

    return train_accuracies, losses

def run_test(test_loader: DataLoader, model: nn.Module,):
    model.eval()

    correct = 0
    total = 0
    with torch.no_grad():
        for test_x, test_y in test_loader:
            test_x = test_x.to(device)
            test_y = test_y.to(device)

            outputs = model(test_x)
            _, predicted = torch.max(outputs, 1)
            total += test_y.size(0)
            correct += (predicted == test_y).sum().item()
    
    accuracy = (correct / total) * 100
    print(f"Test Accuracy: {accuracy:.2f}%\n")

    return accuracy

# Load dataset

In [53]:
%run ./dataset_load.py

is_load_from_csv = input("Load from CSV or images directly? Type anything to load csv, leave blank for images.")

if is_load_from_csv: dataset = load_from_csv(device) # type: ignore
else: dataset = load_from_images(device) # type: ignore

In [54]:
number_of_features = dataset[0][0].numel()

print(number_of_features)

150528


# K-Fold and running the training and validation


In [55]:

test_accuracies = [0.0]*folds
kfold = KFold(n_splits=folds, shuffle=True)
fold = 1
colors = 'bgcmk'
for train_idx, test_idx in kfold.split(dataset):
    print(f"Fold {fold}. Training samples {len(train_idx)}, Testing sampleS {len(test_idx)}")
    
    fold_index = fold-1

    # Init a fresh model
    # model = NeuralNetwork(
    #     input_size=number_of_features, 
    #     hidden_size=hidden_size, 
    #     output_size=2
    # )
    model = get_vgg19_model(
        number_of_features, 
        hidden_sizes, 
        2
    )
    model = model.to(device)

    print(model)

    # optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    
    # Create train and test subsets
    train_subset = Subset(dataset, train_idx.tolist())
    test_subset = Subset(dataset, test_idx.tolist())
    
    # Create DataLoaders
    train_loader = DataLoader(train_subset, batch_size=128, shuffle=True)
    test_loader = DataLoader(test_subset, batch_size=128, shuffle=False)

    train_accuracies, train_losses = run_training(
        train_loader, 
        model, 
        optimizer, 
        epochs
    )

    test_accuracy = run_test(test_loader, model)

    
    test_accuracies[fold_index] = test_accuracy

    # Plot for this fold
    fig, left_y_axis = plt.subplots()
    right_y_axis = left_y_axis.twinx()
    top_x_axis = right_y_axis.twiny()

    left_y_axis.plot(range(1, epochs + 1), train_losses, colors[fold_index]+'-',  label=f'Fold {fold} Train Loss')

    top_x_axis.plot(fold, test_accuracy, colors[fold_index]+'o', label=f'Fold {fold} Test Accuracy')

    right_y_axis.plot(range(1, epochs + 1), train_accuracies, colors[fold_index]+'--', label=f'Fold {fold} Train Accuracy')

    plt.title('Training Loss and Accuracy Per Fold')

    left_y_axis.set_xlabel('Epoch')
    left_y_axis.set_ylabel('Loss')

    right_y_axis.set_ylabel('Accuracy (%)')
    right_y_axis.set_ylim(50.0, 100.5)

    top_x_axis.set_xlim(0, folds+1)
    top_x_axis.set_xticks(range(folds+1))
    top_x_axis.set_xlabel("Fold Number")
    fig.legend( bbox_to_anchor=(0.9,0.1))
    fig.savefig(f'training_loss_fold{fold}.png', bbox_inches='tight') 

    fold += 1

plt.figure(figsize=(8, 6))
plt.scatter([f+1 for f in range(folds)], test_accuracies, color='purple', s=100, label='Test Accuracy')
plt.xlabel('Fold Number')
plt.ylabel('Accuracy (%)')
plt.xlim(0, folds+1)
plt.xticks(range(folds+1))

plt.title('Test Accuracy Per Fold')

plt.savefig('test_accuracy.png', bbox_inches='tight') 



Fold 1. Training samples 1245, Testing sampleS 312
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2

KeyboardInterrupt: 