In [None]:
!pip install torchinfo

In [None]:
!pip install torchmetrics

In [None]:
!pip install wandb

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
from torchinfo import summary

Rertieve data from Google Drive - needs to be altered for SCC

In [23]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Hyperparameters

In [24]:
import wandb
wandb.login()

True

In [47]:
#Create Sweep - wandb

n_classes = 43 #This parameter is set outiside of the sweep since it should not be changed in any instance

sweep_config ={
    'method' : 'random',
    'metric' : {'goal':'minimize', 'name':'loss'},

    'parameters' : {
        'batch_size' : {'values': [5,10,20]},
        'resized_resolution' : {'values': [8]},
        'data_augmentation' : {'values': [False]},
        'n_classes' : {'values': [n_classes]},
        'input_layer_size' : {'values' : [10,20]},
        'hidden_layer_size' : {'values' : [5]},
        'n_hidden_layers' : {'values' : [2]},
        'weight_decay' : {'values': [True]},
        'weight_decay_value' : {'values': [0.0001]}, #Remove if weight_decay = False
        'learning_rate' : {'values': [0.001]},
        'epochs' : {'values':[5]},
        'early_stop_patience' : {'values': [5]}

    }
}


In [48]:
#Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project="ba865-group-project-mlp-test3-sweep")

Create sweep with ID: ycizsgm5
Sweep URL: https://wandb.ai/victorgfloriano/ba865-group-project-mlp-test3-sweep/sweeps/ycizsgm5


In [27]:
def create_secondary_params(resized_resolution):
  """
  This function will create secondary parameters used by the model.
  It had to be transformed into a function because it requires values assgined
  by the current W&B sweep
  """
  image_dimensions = (3, resized_resolution, resized_resolution)
  input_size = image_dimensions[0] * image_dimensions[1] * image_dimensions[2]
  device='cuda' if torch.cuda.is_available() else 'cpu'

  return image_dimensions, input_size, device

In [38]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import v2 #v2 is faster than normal transforms
from torch.utils.data import Subset
import numpy as np

def create_data_loaders(resized_resolution, batch_size, data_augmentation):
  """
  This function will create the dataloaders used by the model.
  It had to be transformed into a function because it requires values assgined
  by the current W&B sweep
  """

  #Convert data to Tensor, Resize, Normalize values, and applies data augmentation
  #such parameter is selected - Train
  if data_augmentation == True:

    train_transforms = transforms.Compose([
        v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]), #Converts to Tensor and Scales [0,1] - Old ToTensor()
        v2.Resize((resized_resolution, resized_resolution)),   #Size 32
        v2.Normalize(0.5,0.5), #Changes range to [-1,1]/ Applies to all channels
        v2.RandomRotation(10),
        v2.RandomHorizontalFlip([0.2]),
        v2.RandomPerspective(0.5, 0.2)      #Applies a perspective shift to the image (20% of chance)
    ])

  else:

    train_transforms = transforms.Compose([
        v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]), #Converts to Tensor and Scales [0,1] - Old ToTensor()
        v2.Resize((resized_resolution, resized_resolution)),   #Size 32
        v2.Normalize((0.5,), (0.5,)), #Changes range to [-1,1]/ Applies to all channels
          ])

  #Convert data to Tensor, Resize, Normalize values - Test
  test_transforms = transforms.Compose([
        v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]), #Converts to Tensor and Scales [0,1] - Old ToTensor()
        v2.Resize((resized_resolution, resized_resolution)),   #Size 32
        v2.Normalize((0.5,),(0.5,)), #Changes range to [-1,1]/ Applies to all channels
          ])


  #Use image folder to get Images with the right class label for Train and Test
  train_dataset = ImageFolder(
      '/content/drive/MyDrive/BU_MSBA/BA865 - Neural Networks/BA865 - Group Project/GTSRBkaggle/Train',
      transform = train_transforms
  )

  indices = np.random.choice(len(train_dataset), 200, replace=False) #REMOVE AFTER TEST
  train_dataset = Subset(train_dataset, indices)

  test_dataset = ImageFolder(
      '/content/drive/MyDrive/BU_MSBA/BA865 - Neural Networks/BA865 - Group Project/GTSRBkaggle/Test_organized',
      transform = test_transforms
  )

  #Separate Train and Validation
  train_dataset, validation_dataset = random_split(train_dataset, [0.8, 0.2])

  #Create DataLoaders
  train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
  val_loader = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=False)
  test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

  return train_loader, val_loader, test_loader

Create MLP Model, loss metric and optimizer

In [40]:
#MLP Model

def create_mlp_model(input_size, input_layer_size, n_hidden_layers,
                  hidden_layer_size, learning_rate, weight_decay, weight_decay_value):
  """
  This function will create the model.
  It had to be transformed into a function because it requires values assgined
  by the current W&B sweep
  """



  class MLP(nn.Module):
      def __init__(self, input_size, input_layer_size, n_hidden_layers, hidden_layer_size, n_classes):
          super(MLP, self).__init__()

          #Initialize the list of layers
          layers = []

          #Start with the first layer from input to first hidden layer
          layers.append(nn.Linear(input_size, input_layer_size))
          layers.append(nn.ReLU())

          #Create the first hidden layer
          layers.append(nn.Linear(input_layer_size, hidden_layer_size))
          layers.append(nn.ReLU())

          #Adding hidden layers, each with size 'hidden_size'
          for _ in range(1, n_hidden_layers):
              layers.append(nn.Linear(hidden_layer_size, hidden_layer_size))
              layers.append(nn.ReLU())

          #Output layer
          layers.append(nn.Linear(hidden_layer_size, n_classes))

          # Create a sequential container with all the layers
          self.mlp = nn.Sequential(*layers)

      def forward(self, x):
          x = x.view(x.size(0), -1)
          return self.mlp(x)


  #Create model
  mlp_model = MLP(input_size, input_layer_size, n_hidden_layers,
                  hidden_layer_size,  n_classes)


  #Store model in CUDA if availabel
  if torch.cuda.is_available():
    mlp_model.cuda()

  summary(mlp_model)


  #Define Loss
  criterion = nn.CrossEntropyLoss() #add weight here to adjust for class imbalance(IMPORTANT)

  #Define Optimizer
  if weight_decay:

    #AdamW is structured to better apply weight decay
    optimizer = optim.AdamW(mlp_model.parameters(), lr=learning_rate,
                            weight_decay=weight_decay_value)
  else:
    optimizer = optim.Adam(mlp_model.parameters(), lr=learning_rate)


  return mlp_model, criterion, optimizer

Define Function to get Accuracy

In [30]:
import torchmetrics

def get_accuracy(dataloader, model, device='cpu'):

    was_training = model.training

    #Create an accuracy metric instance
    acc = torchmetrics.Accuracy(num_classes=n_classes, average='macro', task='multiclass').to(device)

    #Set model to evaluation mode
    model.eval()

    #Set model to gpu if available
    model.to(device)

    with torch.no_grad():
        for images, labels in dataloader:
            #Move images and labels to the correct device - gpu when available
            images = images.to(device)
            labels = labels.to(device)

            #Get model outputs
            outputs = model(images)

            #Get model predictions - torchmetrics.Accuracy needs predictions, not logits or p-dist to calculate accuracy
            predictions = torch.argmax(outputs, axis=1)

            #Update the accuracy metric for each batch
            acc.update(predictions, labels)

    # Compute the final accuracy for all batches
    final_accuracy = acc.compute()

    if was_training:
      model.train() #Sets model back to training if it was. training

    return final_accuracy

#use as -> accuracy = get_accuracy(your_dataloader, mlp_model, device='cuda' if torch.cuda.is_available() else 'cpu')

Define Function to get Loss mertic

In [31]:
def get_loss(loader, model, criterion, device):
    """
    Compute the average loss of the model on a dataset.

    Parameters:
    - loader (DataLoader): The DataLoader for the dataset to evaluate.
    - model (torch.nn.Module): The neural network model.
    - criterion (callable): The loss function.
    - device (str): The device to perform computation on ('cuda' or 'cpu').

    Returns:
    - float: The average loss over the dataset.
    """
    was_training = model.training

    model.eval()  # Set the model to evaluation mode.
    total_loss = 0
    total_samples = 0

    with torch.no_grad():
        for images, labels in loader:

            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)  # Multiply loss by batch size
            total_samples += images.size(0)

    average_loss = total_loss / total_samples  # Normalize by total number of samples

    if was_training:

      model.train() #Sets the model back to training if it was training

    return average_loss

Define Early Stopper

In [32]:
class EarlyStopper:
    def __init__(self, model, patience=3):
        self.model = model
        self.patience = patience
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
            torch.save(self.model.state_dict(), "./best_model.pt")
        elif validation_loss >= self.min_validation_loss:
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

Training Model

In [49]:
def train(config=None):

  #Initialize a new wandb run
  with wandb.init(config=config):
    config = wandb.config #Current configuration will be defined by the sweep

    #Create important secondary params
    image_dimensions, input_size, device = create_secondary_params(config.resized_resolution)

    #Create data loaders and apply augmentations if necessary
    train_loader, val_loader, test_loader = create_data_loaders(config.resized_resolution,
                                                                config.batch_size,
                                                                config.data_augmentation)

    #Create mlp model
    mlp_model, criterion, optimizer = create_mlp_model(input_size,
                                                      config.input_layer_size,
                                                      config.n_hidden_layers,
                                                      config.hidden_layer_size,
                                                      config.learning_rate,
                                                      config.weight_decay,
                                                      config.weight_decay_value)


    early_stopper = EarlyStopper(mlp_model, patience=config.early_stop_patience)

    for epoch in range(config.epochs):
      mlp_model.train()
      total_train_loss = 0 #Needed to log train loss for each epoch

      for i, (images, labels) in enumerate(train_loader):

          #Zero out gradients
          optimizer.zero_grad()

          #Move data to gpu if available
          images = images.to(device)
          labels = labels.to(device)

          #Forward Pass
          outputs = mlp_model(images)

          #Calculate loss
          loss = criterion(outputs, labels)
          total_train_loss += loss.item() * images.size(0)

          #Backward Pass
          loss.backward()
          optimizer.step()

          #Print the loss
          if i%10 == 0:
            print("Epoch", epoch+ 1, " batch.", i+1, " Training Loss:", loss.item())


      #Compute total train accuracy/validation accuracy/validation loss
      train_accuracy = get_accuracy(train_loader, mlp_model, device=device)
      validation_accuracy = get_accuracy(val_loader, mlp_model, device=device)
      validation_loss = get_loss(val_loader, mlp_model, criterion, device)

      print(f'Epoch [{epoch + 1}/{config.epochs}], Train Accuracy: {train_accuracy.item():.4f}, Validation Accuracy: {validation_accuracy.item():.4f}')
      wandb.log({"epoch": epoch + 1,
                 "train_accuracy": train_accuracy.item(),
                 "val_accuracy": validation_accuracy.item(),
                 "train_loss": total_train_loss / len(train_loader.dataset),
                 "validation_loss": validation_loss
            })

      validation_loss = get_loss(val_loader, mlp_model, criterion, device)

      if early_stopper.early_stop(validation_loss):
          print("Validation loss hasn't dropped. Early stopping!")
          break

#Start the sweep with the sweep agent
wandb.agent(sweep_id, function=train, count=5)

[34m[1mwandb[0m: Agent Starting Run: jjyhoeq8 with config:
[34m[1mwandb[0m: 	batch_size: 5
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 5
[34m[1mwandb[0m: 	input_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	n_classes: 43
[34m[1mwandb[0m: 	n_hidden_layers: 2
[34m[1mwandb[0m: 	resized_resolution: 8
[34m[1mwandb[0m: 	weight_decay: True
[34m[1mwandb[0m: 	weight_decay_value: 0.0001


Epoch 1  batch. 1  Training Loss: 3.7236905097961426
Epoch 1  batch. 11  Training Loss: 3.7345168590545654
Epoch 1  batch. 21  Training Loss: 3.9152488708496094
Epoch 1  batch. 31  Training Loss: 3.6440043449401855
Epoch [1/5], Train Accuracy: 0.0286, Validation Accuracy: 0.0417
Epoch 2  batch. 1  Training Loss: 3.72424578666687
Epoch 2  batch. 11  Training Loss: 3.6640281677246094
Epoch 2  batch. 21  Training Loss: 3.5559113025665283
Epoch 2  batch. 31  Training Loss: 3.9712014198303223
Epoch [2/5], Train Accuracy: 0.0204, Validation Accuracy: 0.0400
Epoch 3  batch. 1  Training Loss: 3.7742321491241455
Epoch 3  batch. 11  Training Loss: 3.638566493988037
Epoch 3  batch. 21  Training Loss: 3.6942062377929688
Epoch 3  batch. 31  Training Loss: 4.0683770179748535
Epoch [3/5], Train Accuracy: 0.0195, Validation Accuracy: 0.0400
Epoch 4  batch. 1  Training Loss: 3.631361722946167
Epoch 4  batch. 11  Training Loss: 3.6442761421203613
Epoch 4  batch. 21  Training Loss: 3.549621105194092
Epoc

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▄▁▁▄█
train_loss,█▆▄▂▁
val_accuracy,▄▄▄█▁
validation_loss,█▇▆▄▁

0,1
epoch,5.0
train_accuracy,0.03918
train_loss,3.67897
val_accuracy,0.02083
validation_loss,3.7127


[34m[1mwandb[0m: Agent Starting Run: omwvfip3 with config:
[34m[1mwandb[0m: 	batch_size: 20
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 5
[34m[1mwandb[0m: 	input_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	n_classes: 43
[34m[1mwandb[0m: 	n_hidden_layers: 2
[34m[1mwandb[0m: 	resized_resolution: 8
[34m[1mwandb[0m: 	weight_decay: True
[34m[1mwandb[0m: 	weight_decay_value: 0.0001


Epoch 1  batch. 1  Training Loss: 3.9032607078552246
Epoch [1/5], Train Accuracy: 0.0286, Validation Accuracy: 0.0000
Epoch 2  batch. 1  Training Loss: 3.8126327991485596
Epoch [2/5], Train Accuracy: 0.0286, Validation Accuracy: 0.0000
Epoch 3  batch. 1  Training Loss: 3.885988235473633
Epoch [3/5], Train Accuracy: 0.0286, Validation Accuracy: 0.0000
Epoch 4  batch. 1  Training Loss: 3.8534023761749268
Epoch [4/5], Train Accuracy: 0.0357, Validation Accuracy: 0.0238
Epoch 5  batch. 1  Training Loss: 3.804274082183838
Epoch [5/5], Train Accuracy: 0.0357, Validation Accuracy: 0.0238


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁██
train_loss,█▆▄▃▁
val_accuracy,▁▁▁██
validation_loss,█▆▄▂▁

0,1
epoch,5.0
train_accuracy,0.03571
train_loss,3.78222
val_accuracy,0.02381
validation_loss,3.77906


[34m[1mwandb[0m: Agent Starting Run: 5p8a23mm with config:
[34m[1mwandb[0m: 	batch_size: 20
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 5
[34m[1mwandb[0m: 	input_layer_size: 10
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	n_classes: 43
[34m[1mwandb[0m: 	n_hidden_layers: 2
[34m[1mwandb[0m: 	resized_resolution: 8
[34m[1mwandb[0m: 	weight_decay: True
[34m[1mwandb[0m: 	weight_decay_value: 0.0001


Epoch 1  batch. 1  Training Loss: 3.812147855758667
Epoch [1/5], Train Accuracy: 0.0303, Validation Accuracy: 0.0500
Epoch 2  batch. 1  Training Loss: 3.920024871826172
Epoch [2/5], Train Accuracy: 0.0303, Validation Accuracy: 0.0500
Epoch 3  batch. 1  Training Loss: 3.863832950592041
Epoch [3/5], Train Accuracy: 0.0303, Validation Accuracy: 0.0500
Epoch 4  batch. 1  Training Loss: 3.714726686477661
Epoch [4/5], Train Accuracy: 0.0303, Validation Accuracy: 0.0500
Epoch 5  batch. 1  Training Loss: 3.7447972297668457
Epoch [5/5], Train Accuracy: 0.0303, Validation Accuracy: 0.0500


VBox(children=(Label(value='0.013 MB of 0.013 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▁▁
train_loss,█▆▄▃▁
val_accuracy,▁▁▁▁▁
validation_loss,█▆▅▃▁

0,1
epoch,5.0
train_accuracy,0.0303
train_loss,3.76747
val_accuracy,0.05
validation_loss,3.86815


[34m[1mwandb[0m: Agent Starting Run: 77agtr8l with config:
[34m[1mwandb[0m: 	batch_size: 20
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 5
[34m[1mwandb[0m: 	input_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	n_classes: 43
[34m[1mwandb[0m: 	n_hidden_layers: 2
[34m[1mwandb[0m: 	resized_resolution: 8
[34m[1mwandb[0m: 	weight_decay: True
[34m[1mwandb[0m: 	weight_decay_value: 0.0001


Epoch 1  batch. 1  Training Loss: 3.7803146839141846
Epoch [1/5], Train Accuracy: 0.0287, Validation Accuracy: 0.0109
Epoch 2  batch. 1  Training Loss: 3.724648952484131
Epoch [2/5], Train Accuracy: 0.0322, Validation Accuracy: 0.0114
Epoch 3  batch. 1  Training Loss: 3.718744993209839
Epoch [3/5], Train Accuracy: 0.0287, Validation Accuracy: 0.0114
Epoch 4  batch. 1  Training Loss: 3.757550001144409
Epoch [4/5], Train Accuracy: 0.0253, Validation Accuracy: 0.0114
Epoch 5  batch. 1  Training Loss: 3.6475234031677246
Epoch [5/5], Train Accuracy: 0.0262, Validation Accuracy: 0.0227


VBox(children=(Label(value='0.002 MB of 0.012 MB uploaded\r'), FloatProgress(value=0.14469006028752512, max=1.…

0,1
epoch,▁▃▅▆█
train_accuracy,▄█▄▁▂
train_loss,█▆▄▂▁
val_accuracy,▁▁▁▁█
validation_loss,█▆▄▂▁

0,1
epoch,5.0
train_accuracy,0.0262
train_loss,3.72672
val_accuracy,0.02273
validation_loss,3.75962


[34m[1mwandb[0m: Agent Starting Run: g7fbrq1c with config:
[34m[1mwandb[0m: 	batch_size: 10
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	early_stop_patience: 5
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	hidden_layer_size: 5
[34m[1mwandb[0m: 	input_layer_size: 20
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	n_classes: 43
[34m[1mwandb[0m: 	n_hidden_layers: 2
[34m[1mwandb[0m: 	resized_resolution: 8
[34m[1mwandb[0m: 	weight_decay: True
[34m[1mwandb[0m: 	weight_decay_value: 0.0001


Epoch 1  batch. 1  Training Loss: 4.018603801727295
Epoch 1  batch. 11  Training Loss: 3.835651397705078
Epoch [1/5], Train Accuracy: 0.0294, Validation Accuracy: 0.0000
Epoch 2  batch. 1  Training Loss: 3.9671547412872314
Epoch 2  batch. 11  Training Loss: 3.9173405170440674
Epoch [2/5], Train Accuracy: 0.0294, Validation Accuracy: 0.0000
Epoch 3  batch. 1  Training Loss: 3.816779613494873
Epoch 3  batch. 11  Training Loss: 3.86419415473938
Epoch [3/5], Train Accuracy: 0.0294, Validation Accuracy: 0.0000
Epoch 4  batch. 1  Training Loss: 3.869481325149536
Epoch 4  batch. 11  Training Loss: 3.792750120162964
Epoch [4/5], Train Accuracy: 0.0294, Validation Accuracy: 0.0000
Epoch 5  batch. 1  Training Loss: 3.813955783843994
Epoch 5  batch. 11  Training Loss: 3.7854065895080566
Epoch [5/5], Train Accuracy: 0.0294, Validation Accuracy: 0.0000


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▃▅▆█
train_accuracy,▁▁▁▁▁
train_loss,█▆▅▃▁
val_accuracy,▁▁▁▁▁
validation_loss,█▇▅▃▁

0,1
epoch,5.0
train_accuracy,0.02941
train_loss,3.80655
val_accuracy,0.0
validation_loss,3.79704
