<a href="https://colab.research.google.com/github/malloyca/CSC581B/blob/main/Final%20Project/linear_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CSC581B - Introduction to Deep Learning for Image Classification
# Final Project

In [1]:
# imports
import torch
from torch import nn
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets
from torchvision.transforms import ToTensor, Compose, Normalize
import matplotlib.pyplot as plt
import copy
import numpy as np
from sklearn.model_selection import train_test_split

First we need to load the training and test data.

In [2]:
# Load the training data
training_data = datasets.CIFAR100(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor()
    #transform = Compose([ToTensor(),
    #                    Normalize([0.5071, 0.4867, 0.4408],
    #                              [0.2675, 0.2565, 0.2761])])
)

# Load the test data
test_data = datasets.CIFAR100(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
training_targets = training_data.targets

In [4]:
train_split_index, valid_split_index = train_test_split(
    np.arange(len(training_targets)), test_size=0.2, stratify=training_targets
)

In [5]:
batch_size = 100

# Create data loaders
train_dataloader = DataLoader(training_data, batch_size=batch_size,
                              sampler=SubsetRandomSampler(train_split_index))
valid_dataloader = DataLoader(training_data, batch_size=batch_size,
                              sampler=SubsetRandomSampler(valid_split_index))
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [6]:
# Check that it is splitting the data properly
train_length = 0
for _, y in train_dataloader:
  train_length += len(y)
print(f"Length of training split: {train_length}")

valid_length = 0
for _, y in valid_dataloader:
  valid_length += len(y)
print(f"Length of validation split: {valid_length}")

test_length = 0
for _, y in test_dataloader:
  test_length += len(y)
print(f"Length of test split: {test_length}")

Length of training split: 40000
Length of validation split: 10000
Length of test split: 10000


In [7]:
# Check that there are 100 instances of each class in the validation set
count = 0
for _, y in valid_dataloader:
  for target in y:
    if int(target.numpy()) == 0:
      count += 1

print(count)

100


mean: 'cifar100': (0.5071, 0.4867, 0.4408),

std: 'cifar100': (0.2675, 0.2565, 0.2761),

## Building some linear models

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [9]:
# Define the model
class LinearModel(nn.Module):
  def __init__(self, n_neurons):
    super(LinearModel, self).__init__()
    self.name = f'model_{n_neurons}'
    self.flatten = nn.Flatten()
    self.linear_model = nn.Sequential(
        nn.Linear(32*32*3, n_neurons),
        nn.Linear(n_neurons, n_neurons),
        nn.Linear(n_neurons, 100),
    )

  # Forward propagation function
  def forward(self, x):
    x = self.flatten(x)
    x = self.linear_model(x)
    return x

In [10]:
# Define training loop function
def train(dataloader, model, loss_fn, optimizer):
  size = 40000
  model.train()
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    # Compute prediction error
    pred = model(X)
    loss = loss_fn(pred, y)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [11]:
# Define the test function
def test(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f"Validation Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
  return test_loss

In [12]:
n_epochs = 50

In [13]:
def training_loop(n_epochs, model, train_data, test_data, loss_function,
                  optimizer, scheduler=None, early_stopping=False, patience=10):
  current_epoch = 0
  best_epoch = 0
  best_loss = float('inf')
  patience_counter = 0

  for epoch in range(n_epochs):
    # Iterate epoch counter
    current_epoch += 1
    print()
    print(f"Epoch {current_epoch}\n----------------------------")

    train(train_data, model, loss_function, optimizer)
    test_loss = test(test_data, model, loss_function)

    # Iterate LR scheduler
    if scheduler is not None:
      scheduler.step()

    # Check test_loss for early stopping
    if early_stopping:
      if test_loss < best_loss:
        # store loss
        best_loss = test_loss

        # reset patience counter
        patience_counter = 0

        # store model and epoch number
        print("Storing new best model.")
        best_model_state_dict = copy.deepcopy(model.state_dict())
        best_epoch = current_epoch
        
      # If patience limit not yet reached, iterate patience counter
      elif patience_counter < patience - 1:
        patience_counter += 1
        print(f"Patience counter: {patience_counter}")
      
      # If patience limit reached, store model and break the loop
      else:
        print("Finished due to early stopping.")
        print(f"Saving best model: {model.name}_epoch-{best_epoch:03d}")
        torch.save(best_model_state_dict, f'{model.name}_epoch-{best_epoch:03d}')
        break

  # If we get here, we did not stop early - save best model
  if early_stopping:
    print(f"Saving best model: {model.name}-epoch:{best_epoch:03d}")
    torch.save(best_model_state_dict, f'{model.name}-epoch:{best_epoch:03d}')


In [14]:
model_100 = LinearModel(n_neurons=100).to(device)
print(model_100)

model_500 = LinearModel(n_neurons=500).to(device)
print(model_500)

model_1000 = LinearModel(n_neurons=1000).to(device)
print(model_1000)

model_2500 = LinearModel(n_neurons=2000).to(device)
print(model_2500)

LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=100, bias=True)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=500, bias=True)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): Linear(in_features=500, out_features=100, bias=True)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=1000, bias=True)
    (1): Linear(in_features=1000, out_features=1000, bias=True)
    (2): Linear(in_features=1000, out_features=100, bias=True)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=2000, bias=Tru

In [15]:
# Loss function
loss_fn = nn.CrossEntropyLoss()

# Optimizer
optimizer_100 = torch.optim.SGD(model_100.parameters(), lr=1e-1)
optimizer_500 = torch.optim.SGD(model_500.parameters(), lr=1e-1)
optimizer_1000 = torch.optim.SGD(model_1000.parameters(), lr=5e-2)
optimizer_2500 = torch.optim.SGD(model_2500.parameters(), lr=1e-1)

# Learning rate schedulers
scheduler_100 = StepLR(optimizer_100, step_size=15, gamma=0.1)
scheduler_500 = StepLR(optimizer_500, step_size=15, gamma=0.1)
scheduler_1000 = StepLR(optimizer_1000, step_size=20, gamma=0.25)
scheduler_2500 = StepLR(optimizer_2500, step_size=15, gamma=0.1)

In [16]:
training_loop(n_epochs, model_100, train_dataloader, valid_dataloader,
              loss_fn, optimizer_100, scheduler=scheduler_100, early_stopping=False, patience=10)


Epoch 1
----------------------------
loss: 4.599590 [    0/40000]
loss: 4.195426 [10000/40000]


KeyboardInterrupt: ignored

In [None]:
training_loop(n_epochs, model_500, train_dataloader, valid_dataloader,
              loss_fn, optimizer_500, scheduler=scheduler_500, early_stopping=False, patience=10)

In [17]:
training_loop(n_epochs, model_1000, train_dataloader, valid_dataloader,
              loss_fn, optimizer_1000, scheduler=scheduler_1000, early_stopping=False, patience=10)


Epoch 1
----------------------------
loss: 4.627542 [    0/40000]
loss: 4.462950 [10000/40000]
loss: 4.256098 [20000/40000]
loss: 3.947877 [30000/40000]
Validation Error: 
 Accuracy: 2.0%, Avg loss: 3.993850 


Epoch 2
----------------------------
loss: 3.953144 [    0/40000]
loss: 3.753744 [10000/40000]
loss: 3.752641 [20000/40000]
loss: 3.708488 [30000/40000]
Validation Error: 
 Accuracy: 2.4%, Avg loss: 3.856921 


Epoch 3
----------------------------
loss: 3.947703 [    0/40000]
loss: 4.088981 [10000/40000]
loss: 3.892617 [20000/40000]
loss: 3.756453 [30000/40000]
Validation Error: 
 Accuracy: 2.9%, Avg loss: 3.769571 


Epoch 4
----------------------------
loss: 3.425246 [    0/40000]
loss: 3.935998 [10000/40000]
loss: 3.888884 [20000/40000]
loss: 3.740047 [30000/40000]
Validation Error: 
 Accuracy: 2.7%, Avg loss: 3.805315 


Epoch 5
----------------------------
loss: 4.044482 [    0/40000]
loss: 3.699428 [10000/40000]
loss: 3.784934 [20000/40000]
loss: 3.707857 [30000/40000]
Va

In [None]:
training_loop(n_epochs, model_2500, train_dataloader, valid_dataloader,
              loss_fn, optimizer_2500, scheduler=scheduler_2500, early_stopping=False, patience=10)