# Resources
- https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
- https://blog.paperspace.com/alexnet-pytorch/
- https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py
- https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html
- https://en.wikipedia.org/wiki/AlexNet
- http://d2l.ai/chapter_convolutional-modern/alexnet.html

# Imports

In [2]:
# import pandas as pd # dataframes
# import torchvision.transforms as transforms

# import numpy as np
# import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms

import torchmetrics

# Device Configuration

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using {device} processing")

Using cuda processing


# Load Datasets

In [4]:
dir = './data'
download = True
transform = transforms.Compose( # define normalization transform
    [
      transforms.Resize((227,227)), # resize images to required minimum 227x227
      transforms.ToTensor(), # transform image to tensor and torch format
      transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5)) # apply normalize across all the channels for the image
    ]
)

# Get Datasets
dataset_train = datasets.CIFAR10(root = dir, download=download, transform=transform, train = True)
dataset_test = datasets.CIFAR10(root = dir, download=download, transform=transform, train = False)

# Split the training dataset into a training set (90% samples) and a validation set (10% samples).
size_train = int(0.9 * len(dataset_train))
size_valid = len(dataset_train) - size_train

dataset_train, dataset_valid = torch.utils.data.random_split(dataset_train, [size_train, size_valid])

# Verify the sizes of the training and validation sets
print(f"Training Size: {len(dataset_train)}")
print(f"Validation Size: {len(dataset_valid)}")
print(f"Testing Size: {len(dataset_test)}")

Files already downloaded and verified
Files already downloaded and verified
Training Size: 45000
Validation Size: 5000
Testing Size: 10000


# Explore and Visualize Data

# Model Architecture

In [5]:
class AlexNet(nn.Module):
    def __init__(self, num_classes):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((6,6))

        self.classifier = nn.Sequential(
            nn.Linear(9216, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),

            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# TensorBoard

# Hyperparameters & Loss Function

In [6]:
learning_rates = [0.1, 0.01, 0.001, 0.0001]
batch_sizes = [32, 64]
epochs = [25] # 50
classes = 10

loss_function = nn.CrossEntropyLoss() # Loss

# Define Training Function

In [12]:
optimizations = True

def TrainModel(optimizer, batch_size, learning_rate, num_epochs):
  print(f"Training model using parameters:")
  print(f"    - Optimizer: {optimizer} ")
  print(f"    - Batch Size: {batch_size} ")
  print(f"    - Learning Rate: {learning_rate} ")
  print(f"    - Epochs: {num_epochs} ")
  print()

  # Model
  model = AlexNet(num_classes = classes).to(device)


  # Optimizer Function
  # this will help change the parameters of the model, influenced by the learning rate
  if optimizer == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
  elif optimizer == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9)
  else:
    return

  # Data loaders
  # Data loaders split the data up into batches as determined by the batch size
  if optimizations: 
    loader_train = torch.utils.data.DataLoader(dataset = dataset_train, batch_size = batch_size, shuffle = True, num_workers=4, pin_memory=True)
    loader_valid = torch.utils.data.DataLoader(dataset = dataset_valid, batch_size = batch_size, shuffle = False, num_workers=4, pin_memory=True)
  else:
    loader_train = torch.utils.data.DataLoader(dataset = dataset_train, batch_size = batch_size, shuffle = True)
    loader_valid = torch.utils.data.DataLoader(dataset = dataset_valid, batch_size = batch_size, shuffle = False)

  # Metrics
  metric_accuracy = torchmetrics.Accuracy(task='multiclass', num_classes=classes).to(device)
  metric_f1 = torchmetrics.F1Score(task='multiclass', num_classes = classes).to(device)

  # For Epoch
  for epoch in range(num_epochs):
    loss_epoch = 0
    for i, (inputs, targets) in enumerate(loader_train): # loop through each batch the dataloader has
      # Get the inputs and their target class
      inputs = inputs.to(device)
      targets = targets.to(device)

      # Zero the parameter gradients
      optimizer.zero_grad()

      outputs = model(inputs) # feed the model the inputs, and get predictions off the inputs
      loss = loss_function(outputs, targets) # compare the preditions to the actual target values of the inputs
      loss.backward() # compute the gradients
      optimizer.step() # actually update the model parameters based off the gradients computed previously

      loss_epoch += loss.item()
      if i%(round((len(loader_train)/3), -1)) == 0:
        print(f'Epoch {epoch+1}/{num_epochs}; Step: {i+1}/{len(loader_train)}')
        print(f'Loss: {loss.item():.4f}')
        print()
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    model.eval() # set the model to eval mode so it does not train off the whole training/testing sets
    with torch.no_grad(): # disabling gradient calculation, since we're not computing gradients
      
      loss_epoch = loss_epoch/len(loader_train)
      metric_accuracy.reset()
      metric_f1.reset()
      for inputs, targets in loader_valid:
        inputs = inputs.to(device) # get test input
        targets = targets.to(device) # get the classes of the test input
        outputs = model(inputs) # predict the classification values of the test input
        predicted = torch.argmax(outputs.data, 1) # get the highest classification value

        metric_accuracy.update(predicted, targets)
        metric_f1.update(predicted, targets)

      print(f'Epoch: {epoch+1}/{num_epochs}')
      print(f'Loss: {loss_epoch:.4f}')
      print(f'Accuracy: {(100 * metric_accuracy.compute()):.4f}%')
      print(f'F1Score: {(100* metric_f1.compute()):.4f}%')
      print()

      if torch.cuda.is_available():
        torch.cuda.empty_cache()

    model.train() # set model back in training mode

# Train Models

In [8]:
for epoch in epochs:
  for batch_size in batch_sizes:
    for learning_rate in learning_rates:
      #TrainModel('Adam', batch_size, learning_rate, epoch)
      break

In [9]:
TrainModel('SGD', 16, 0.005, 3)

Training model using parameters:
    - Optimizer: SGD 
    - Batch Size: 16 
    - Learning Rate: 0.005 
    - Epochs: 3 



  return F.conv2d(input, weight, bias, self.stride,


Epoch: 1/3
Accuracy: 54.3600%
F1Score: 54.3600%

Epoch: 2/3
Accuracy: 65.3000%
F1Score: 65.3000%

Epoch: 3/3
Accuracy: 72.0200%
F1Score: 72.0200%



In [13]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
TrainModel('SGD', 32, 0.005, 3)

Training model using parameters:
    - Optimizer: SGD 
    - Batch Size: 32 
    - Learning Rate: 0.005 
    - Epochs: 3 

Epoch 1/3; Step: 1/1407
Loss: 2.3029

Epoch 1/3; Step: 471/1407
Loss: 2.2110

Epoch 1/3; Step: 941/1407
Loss: 1.5804

Epoch: 1/3
Loss: 1.8845288646754934
Accuracy: 45.4800%
F1Score: 45.4800%

Epoch 2/3; Step: 1/1407
Loss: 1.2234



# Evaluate Model Accuracy
- Visualizations

# Test Set Predictions