# Resources
- https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
- https://blog.paperspace.com/alexnet-pytorch/
- https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py
- https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html
- https://en.wikipedia.org/wiki/AlexNet
- http://d2l.ai/chapter_convolutional-modern/alexnet.html

# Imports

In [None]:
# import pandas as pd # dataframes
# import torchvision.transforms as transforms

import time
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchmetrics
import torchvision
import torchvision.models as models
import torchvision.datasets as datasets
import torchvision.transforms as transforms

from torch.utils.tensorboard import SummaryWriter # http://localhost:6006/

# Device Configuration

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} processing")

#torch.set_default_dtype(torch.float16)

# Hyperparameters & Loss Function

In [None]:
learning_rates_test = [0.01, 0.005, 0.001,  0.0005, 0.0001] # learning rates to test
batch_size = 32
epoch_limit = 20
epoch_test_limit = 3
classes = 10

loss_function = nn.CrossEntropyLoss() # Loss

# Load Datasets

In [None]:
dir = './data'
download = True
transform = transforms.Compose( # define normalization transform
    [
      transforms.Resize((227,227)), # resize images to required minimum 227x227
      transforms.ToTensor(), # transform image to tensor and torch format
      transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5)) # apply normalize across all the channels for the image
    ]
)

# Get Datasets
dataset_train = datasets.CIFAR10(root = dir, download=download, transform=transform, train = True)
dataset_test = datasets.CIFAR10(root = dir, download=download, transform=transform, train = False)

# Split the training dataset into a training set (90% samples) and a validation set (10% samples).
size_train = int(0.9 * len(dataset_train))
size_valid = len(dataset_train) - size_train

dataset_train, dataset_valid = torch.utils.data.random_split(dataset_train, [size_train, size_valid])

from torch.utils.data import Subset
dataset_train = Subset(dataset_train, range(4000))

# Data loaders split the data up into batches as determined by the batch size
loader_train = torch.utils.data.DataLoader(dataset = dataset_train, batch_size = batch_size, shuffle = True)
loader_valid = torch.utils.data.DataLoader(dataset = dataset_valid, batch_size = batch_size, shuffle = False)

# Verify the sizes of the training and validation sets
print(f"Training Size: {len(dataset_train)}")
print(f"Validation Size: {len(dataset_valid)}")
print(f"Testing Size: {len(dataset_test)}")

# Explore and Visualize Data

In [None]:
def DisplayImage(image): # Normalize and display the image
    image = image / 2 + 0.5 # unnormalize
    npImage = image.numpy() # Convert the image tensor to a NumPy array
    plt.imshow(np.transpose(npImage, (1, 2, 0)))
    plt.show() # display

dateIterator = iter(loader_train)
images, labels = next(dateIterator)

DisplayImage(torchvision.utils.make_grid(images))


# Model Architecture

In [None]:
model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT)
print(model)

# Define Training Function
- TensorBoard

In [None]:
class MetricWrapper:
    def __init__(self, learning_rate, num_epochs, optimizer):
        self.learning_rate = learning_rate
        self.epochs = num_epochs
        self.optimizer = optimizer

        self.losses = []
        self.accuracy = []
        self.f1 = []

    def PrintMetrics(self):
        print(f'Trained with learning Rate: {self.learning_rate} for {self.epochs} epochs using the {self.optimizer} optimizer.')
        for epoch in range(self.epochs):
            print(f'Epoch {epoch+1}; Loss: {(self.losses[epoch]):.4f}; Accuracy: {(100 * self.accuracy[epoch]):.4f}%')
    
    def PrintFinal(self):
        print(f'Trained with learning Rate: {self.learning_rate} for {self.epochs} epochs using the {self.optimizer} optimizer.')
        print(f'Final Loss: {(self.losses[-1]):.4f}; ', end = '')
        print(f'Final Accuracy: {(100 * self.accuracy[-1]):.4f}%; ', end = '')
        print(f'Final F1: {(100 * self.f1[-1]):.4f}%')


In [None]:
def TrainModel(optimizer_choice, batch_size, learning_rate, num_epochs, printStep = False):
  writer = SummaryWriter(log_dir=f'runs/{optimizer_choice}-{num_epochs}E-{learning_rate}LR')
  #print(f"Training model using parameters:")
  #print(f"    - Optimizer: {optimizer_choice} ")
  #print(f"    - Batch Size: {batch_size} ")
  #print(f"    - Learning Rate: {learning_rate} ")
  #print(f"    - Epochs: {num_epochs} ")
  #print()

  # Model
  model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT).to(device)

  # Optimizer Function
  # this will help change the parameters of the model, influenced by the learning rate
  if optimizer_choice == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
  elif optimizer_choice == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9)
  else:
    return

  # Metrics
  metrics = MetricWrapper(learning_rate, num_epochs, optimizer_choice)

  metric_accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=classes).to(device)
  metric_f1 = torchmetrics.F1Score(task='multiclass', num_classes = classes).to(device)

  # For Epoch
  for epoch in range(num_epochs):
    loss_epoch = 0
    for i, (inputs, targets) in enumerate(loader_train): # loop through each batch the dataloader has
      inputs = inputs.to(device) # Get the inputs and their target class
      targets = targets.to(device)

      optimizer.zero_grad() # Zero the parameter gradients

      outputs = model(inputs) # feed the model the inputs, and get predictions off the inputs
      loss = loss_function(outputs, targets) # compare the preditions to the actual target values of the inputs
      loss.backward() # compute the gradients
      optimizer.step() # actually update the model parameters based off the gradients computed previously

      loss_epoch += loss.item()

      if printStep and (i%(round((len(loader_train)/3), -1)) == 0):
        print(f'LR: {learning_rate}; Opt: {optimizer}; Epoch {epoch+1}/{num_epochs}; Step: {i+1}/{len(loader_train)}; Loss: {loss.item():.4f}')
        print()
    
    model.eval() # set the model to eval mode so it does not train off the whole training/testing sets
    with torch.no_grad(): # disabling gradient calculation, since we're not computing gradients
      metric_accuracy.reset()
      metric_f1.reset()
      
      for inputs, targets in loader_valid:
        inputs = inputs.to(device) # get test input
        targets = targets.to(device) # get the classes of the test input
        outputs = model(inputs) # predict the classification values of the test input
        predicted = torch.argmax(outputs.data, 1) # get the highest classification value

        metric_accuracy.update(predicted, targets)
        metric_f1.update(predicted, targets)

      loss_epoch = loss_epoch/len(loader_train)
      metrics.losses.append(loss_epoch)
      metrics.accuracy.append(metric_accuracy.compute().item())
      metrics.f1.append(metric_f1.compute().item())

      writer.add_scalar("Loss/Epoch", loss_epoch, epoch)
      writer.add_scalar("Accuracy/Epoch", metric_accuracy.compute().item(), epoch)

      #print(f'Epoch: {epoch+1}/{num_epochs}')
      #print(f'Loss: {loss_epoch:.4f}')
      #print(f'Accuracy: {(100 * metric_accuracy.compute()):.4f}%')
      #print(f'F1Score: {(100* metric_f1.compute()):.4f}%')
      #print()

    model.train() # set model back in training mode

  #print(f'Trained with learning Rate: {learning_rate} for {num_epochs} epochs using the {optimizer} optimizer.')
  #print(f'Final Loss: {(metrics.losses[-1]):.4f}; Final Accuracy: {(100 * metrics.accuracy[-1]):.4f}%; Final F1: {(100 * metrics.f1[-1]):.4f}%')
  writer.flush()
  return metrics

# Train Models

# Hyperparameter Tuning

In [None]:
param_training_start = time.time()

test_metrics_adam = []
for learning_rate in learning_rates_test:
  test_metrics_adam.append(TrainModel('Adam', batch_size, learning_rate, epoch_test_limit))
  test_metrics_adam[-1].PrintFinal()

test_metrics_sgd = []
for learning_rate in learning_rates_test:
  test_metrics_sgd.append(TrainModel('SGD', batch_size, learning_rate, epoch_test_limit))
  test_metrics_sgd[-1].PrintFinal()

param_training_end = time.time()

print(f"Elapsed time: {(param_training_end - param_training_start):.2f} seconds ({((param_training_end - param_training_start)/60):.2f} minutes)")
  

In [None]:
for metrics in test_metrics_adam:
    metrics.PrintFinal()
    print()

for metrics in test_metrics_sgd:
    metrics.PrintFinal()
    print()

metrics_sorted = sorted(test_metrics_adam, key=lambda x: x.accuracy[-1], reverse=True)
learning_rates_adam = [wrapper.learning_rate for wrapper in metrics_sorted[:2]]

metrics_sorted = sorted(test_metrics_sgd, key=lambda x: x.accuracy[-1], reverse=True)
learning_rates_sgd = [wrapper.learning_rate for wrapper in metrics_sorted[:2]]

print('LR Adam')
for lr in learning_rates_adam:
    print(lr)
print()
print('LR SGD')
for lr in learning_rates_sgd:
    print(lr)

# Evaluate Model Accuracy
- Visualizations

# Test Set Predictions