<a href="https://colab.research.google.com/github/malloyca/CSC581B/blob/main/Final%20Project/linear_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CSC581B - Introduction to Deep Learning for Image Classification
# Final Project

In [48]:
# imports
import torch
from torch import nn
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import copy
import numpy as np
from sklearn.model_selection import train_test_split

First we need to load the training and test data.

In [44]:
# Load the training data
training_data = datasets.CIFAR100(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor()
)

# Load the test data
test_data = datasets.CIFAR100(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting data/cifar-100-python.tar.gz to data
Files already downloaded and verified


In [47]:
training_targets = training_data.targets

In [49]:
train_split_index, valid_split_index = train_test_split(
    np.arange(len(training_targets)), test_size=0.2, stratify=training_targets
)

In [51]:
batch_size = 64

# Create data loaders
train_dataloader = DataLoader(training_data, batch_size=batch_size,
                              sampler=SubsetRandomSampler(train_split_index))
valid_dataloader = DataLoader(training_data, batch_size=batch_size,
                              sampler=SubsetRandomSampler(valid_split_index))
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [55]:
# Check that it is splitting the data properly
train_length = 0
for _, y in train_dataloader:
  train_length += len(y)
print(f"Length of training split: {train_length}")

valid_length = 0
for _, y in valid_dataloader:
  valid_length += len(y)
print(f"Length of validation split: {valid_length}")

test_length = 0
for _, y in test_dataloader:
  test_length += len(y)
print(f"Length of test split: {test_length}")

Length of training split: 40000
Length of validation split: 10000
Length of test split: 10000


In [60]:
# Check that there are 100 instances of each class in the validation set
count = 0
for _, y in valid_dataloader:
  for target in y:
    if int(target.numpy()) == 0:
      count += 1

print(count)

100


## Building some linear models

In [61]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [63]:
# Define the model
class LinearModel(nn.Module):
  def __init__(self, n_neurons):
    super(LinearModel, self).__init__()
    self.name = f'model_{n_neurons}'
    self.flatten = nn.Flatten()
    self.linear_model = nn.Sequential(
        nn.Linear(32*32*3, n_neurons),
        nn.Linear(n_neurons, n_neurons),
        nn.Linear(n_neurons, 100),
        nn.Softmax(dim=1)
    )

  # Forward propagation function
  def forward(self, x):
    x = self.flatten(x)
    x = self.linear_model(x)
    return x

In [71]:
model_100 = LinearModel(n_neurons=100).to(device)
print(model_100)

model_500 = LinearModel(n_neurons=500).to(device)
print(model_500)

model_1000 = LinearModel(n_neurons=1000).to(device)
print(model_1000)

model_5000 = LinearModel(n_neurons=5000).to(device)
print(model_5000)

LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=100, bias=True)
    (3): Softmax(dim=1)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=500, bias=True)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): Linear(in_features=500, out_features=100, bias=True)
    (3): Softmax(dim=1)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=1000, bias=True)
    (1): Linear(in_features=1000, out_features=1000, bias=True)
    (2): Linear(in_features=1000, out_features=100, bias=True)
    (3): Softmax(dim=1)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): S

In [72]:
# Loss function
loss_fn = nn.CrossEntropyLoss()

# Optimizer
optimizer_100 = torch.optim.SGD(model_100.parameters(), lr=1e-2)
optimizer_500 = torch.optim.SGD(model_500.parameters(), lr=1e-2)
optimizer_1000 = torch.optim.SGD(model_1000.parameters(), lr=1e-2)
optimizer_5000 = torch.optim.SGD(model_5000.parameters(), lr=1e-2)

In [73]:
# Define training loop function
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    # Compute prediction error
    pred = model(X)
    loss = loss_fn(pred, y)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [74]:
# Define the test function
def test(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f"Validation Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
  return test_loss

In [75]:
n_epochs = 250

In [76]:
def training_loop(n_epochs, model, train_data, test_data, loss_function,
                  optimizer, early_stopping=False, patience=10):
  current_epoch = 0
  best_epoch = 0
  best_loss = float('inf')
  patience_counter = 0

  for e in range(n_epochs):
    print()
    print(f"Epoch {e+1}\n----------------------------")
    train(train_data, model, loss_function, optimizer)
    test_loss = test(test_data, model, loss_function)
    
    # Iterate epoch counter
    current_epoch += 1

    # Check test_loss for early stopping
    if early_stopping:
      if test_loss < best_loss:
        # store loss
        best_loss = test_loss

        # reset patience counter
        patience_counter = 0

        # store model and epoch number
        print("Storing new best model.")
        best_model_state_dict = copy.deepcopy(model.state_dict())
        best_epoch = current_epoch
        
      # If patience limit not yet reached, iterate patience counter
      elif patience_counter < patience - 1:
        patience_counter += 1
        print(f"Patience counter: {patience_counter}")
      
      # If patience limit reached, store model and break the loop
      else:
        print("Finished due to early stopping.")
        print(f"Saving best model: {model.name}_epoch-{best_epoch:03d}")
        torch.save(best_model_state_dict, f'{model.name}_epoch-{best_epoch:03d}')
        break

  # If we get here, we did not stop early - save best model
  if early_stopping:
    print(f"Saving best model: {model.name}-epoch:{best_epoch:03d}")
    torch.save(best_model_state_dict, f'{model.name}-epoch:{best_epoch:03d}')


In [70]:
training_loop(n_epochs, model_100, train_dataloader, valid_dataloader,
              loss_fn, optimizer_100, early_stopping=True, patience=10)


Epoch 1
----------------------------
loss: 4.605326 [    0/50000]
loss: 4.605430 [ 6400/50000]
loss: 4.605166 [12800/50000]
loss: 4.605332 [19200/50000]
loss: 4.605208 [25600/50000]
loss: 4.605026 [32000/50000]
loss: 4.605246 [38400/50000]
Validation Error: 
 Accuracy: 0.2%, Avg loss: 4.605176 

Storing new best model.

Epoch 2
----------------------------
loss: 4.605251 [    0/50000]
loss: 4.604918 [ 6400/50000]
loss: 4.605102 [12800/50000]
loss: 4.605197 [19200/50000]
loss: 4.605137 [25600/50000]
loss: 4.605341 [32000/50000]
loss: 4.605242 [38400/50000]
Validation Error: 
 Accuracy: 0.2%, Avg loss: 4.605170 

Storing new best model.

Epoch 3
----------------------------
loss: 4.605281 [    0/50000]
loss: 4.605211 [ 6400/50000]
loss: 4.604842 [12800/50000]
loss: 4.604983 [19200/50000]
loss: 4.604959 [25600/50000]
loss: 4.604964 [32000/50000]
loss: 4.605007 [38400/50000]
Validation Error: 
 Accuracy: 0.2%, Avg loss: 4.605172 

Patience counter: 1

Epoch 4
----------------------------


KeyboardInterrupt: ignored

In [None]:
training_loop(n_epochs, model_500, train_dataloader, test_dataloader,
              loss_fn, optimizer_500, early_stopping=True, patience=10)

In [77]:
training_loop(n_epochs, model_1000, train_dataloader, test_dataloader,
              loss_fn, optimizer_1000, early_stopping=True, patience=10)


Epoch 1
----------------------------
loss: 4.604955 [    0/50000]
loss: 4.604829 [ 6400/50000]
loss: 4.605097 [12800/50000]
loss: 4.605495 [19200/50000]
loss: 4.605272 [25600/50000]
loss: 4.604870 [32000/50000]
loss: 4.604983 [38400/50000]
Validation Error: 
 Accuracy: 0.8%, Avg loss: 4.605164 

Storing new best model.

Epoch 2
----------------------------
loss: 4.605164 [    0/50000]
loss: 4.605107 [ 6400/50000]
loss: 4.605224 [12800/50000]
loss: 4.605189 [19200/50000]
loss: 4.605188 [25600/50000]
loss: 4.605099 [32000/50000]
loss: 4.605329 [38400/50000]
Validation Error: 
 Accuracy: 0.8%, Avg loss: 4.605133 

Storing new best model.

Epoch 3
----------------------------
loss: 4.605162 [    0/50000]
loss: 4.604971 [ 6400/50000]
loss: 4.604927 [12800/50000]
loss: 4.605117 [19200/50000]
loss: 4.605292 [25600/50000]
loss: 4.605055 [32000/50000]
loss: 4.605017 [38400/50000]
Validation Error: 
 Accuracy: 0.8%, Avg loss: 4.605101 

Storing new best model.

Epoch 4
-------------------------

In [None]:
training_loop(n_epochs, model_5000, train_dataloader, test_dataloader,
              loss_fn, optimizer_5000, early_stopping=True, patience=10)