<a href="https://colab.research.google.com/github/malloyca/CSC581B/blob/main/Final%20Project/linear_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CSC581B - Introduction to Deep Learning for Image Classification
# Final Project

In [None]:
# imports
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import copy

First we need to load the training and test data.

In [None]:
# Load the training data (CIFAR10 to start)
training_data = datasets.CIFAR10(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor()
)

# Load the test data
test_data = datasets.CIFAR10(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
batch_size = 64

# Create data loaders
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [None]:
# Check the data dimensions
for X, y in test_dataloader:
  print("Shape of X batch [Batch size, Channels, Height, Width]: ", X.shape)
  print("Shape of y batch: ", y.shape, y.dtype)
  break

Shape of X batch [Batch size, Channels, Height, Width]:  torch.Size([64, 3, 32, 32])
Shape of y batch:  torch.Size([64]) torch.int64


## Building some linear models

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [None]:
# Define the model
class LinearModel(nn.Module):
  def __init__(self, n_neurons):
    super(LinearModel, self).__init__()
    self.name = f'model_{n_neurons}'
    self.flatten = nn.Flatten()
    self.linear_model = nn.Sequential(
        nn.Linear(32*32*3, n_neurons),
        nn.Linear(n_neurons, n_neurons),
        nn.Linear(n_neurons, 10),
        nn.Softmax(dim=1)
    )

  # Forward propagation function
  def forward(self, x):
    x = self.flatten(x)
    x = self.linear_model(x)
    return x

In [None]:
model_100 = LinearModel(n_neurons=100).to(device)
print(model_100)

model_500 = LinearModel(n_neurons=500).to(device)
print(model_500)

model_1000 = LinearModel(n_neurons=1000).to(device)
print(model_1000)

model_5000 = LinearModel(n_neurons=5000).to(device)
print(model_5000)

LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=500, bias=True)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): Linear(in_features=500, out_features=10, bias=True)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=1000, bias=True)
    (1): Linear(in_features=1000, out_features=1000, bias=True)
    (2): Linear(in_features=1000, out_features=10, bias=True)
  )
)
LinearModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_model): Sequential(
    (0): Linear(in_features=3072, out_features=5000, bias=True)


In [None]:
# Loss function
loss_fn = nn.CrossEntropyLoss()

# Optimizer
optimizer_100 = torch.optim.SGD(model_100.parameters(), lr=1e-3)
optimizer_500 = torch.optim.SGD(model_500.parameters(), lr=1e-2)
optimizer_1000 = torch.optim.SGD(model_1000.parameters(), lr=1e-2)
optimizer_5000 = torch.optim.SGD(model_5000.parameters(), lr=1e-2)

In [None]:
# Define training loop function
def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    # Compute prediction error
    pred = model(X)
    loss = loss_fn(pred, y)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [None]:
# Define the test function
def test(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
  return test_loss

In [None]:
n_epochs = 250

In [None]:
def training_loop(n_epochs, model, train_data, test_data, loss_function,
                  optimizer, early_stopping=False, patience=10):
  current_epoch = 0
  best_epoch = 0
  best_loss = float('inf')
  patience_counter = 0

  for e in range(n_epochs):
    print()
    print(f"Epoch {e+1}\n----------------------------")
    train(train_data, model, loss_function, optimizer)
    test_loss = test(test_data, model, loss_function)
    
    # Iterate epoch counter
    current_epoch += 1

    # Check test_loss for early stopping
    if early_stopping:
      if test_loss < best_loss:
        # store loss
        best_loss = test_loss

        # reset patience counter
        patience_counter = 0

        # store model and epoch number
        print("Storing new best model.")
        best_model_state_dict = copy.deepcopy(model.state_dict())
        best_epoch = current_epoch
        
      # If patience limit not yet reached, iterate patience counter
      elif patience_counter < patience - 1:
        patience_counter += 1
        print(f"Patience counter: {patience_counter}")
      
      # If patience limit reached, store model and break the loop
      else:
        print("Finished due to early stopping.")
        print(f"Saving best model: {model.name}_epoch-{best_epoch:03d}")
        torch.save(best_model_state_dict, f'{model.name}_epoch-{best_epoch:03d}')
        break

  # If we get here, we did not stop early - save best model
  if early_stopping:
    print(f"Saving best model: {model.name}-epoch:{best_epoch:03d}")
    torch.save(best_model_state_dict, f'{model.name}-epoch:{best_epoch:03d}')


In [None]:
training_loop(n_epochs, model_100, train_dataloader, test_dataloader,
              loss_fn, optimizer_100, early_stopping=True, patience=10)


Epoch 1
----------------------------
loss: 2.329694 [    0/50000]
loss: 2.306764 [ 6400/50000]
loss: 2.272973 [12800/50000]
loss: 2.285583 [19200/50000]
loss: 2.256721 [25600/50000]
loss: 2.262734 [32000/50000]
loss: 2.273457 [38400/50000]
loss: 2.255854 [44800/50000]
Test Error: 
 Accuracy: 19.5%, Avg loss: 2.237885 

Storing new best model.

Epoch 2
----------------------------
loss: 2.264606 [    0/50000]
loss: 2.251604 [ 6400/50000]
loss: 2.184141 [12800/50000]
loss: 2.244247 [19200/50000]
loss: 2.179899 [25600/50000]
loss: 2.193682 [32000/50000]
loss: 2.232801 [38400/50000]
loss: 2.186459 [44800/50000]
Test Error: 
 Accuracy: 22.6%, Avg loss: 2.167875 

Storing new best model.

Epoch 3
----------------------------
loss: 2.207867 [    0/50000]
loss: 2.188828 [ 6400/50000]
loss: 2.083544 [12800/50000]
loss: 2.196449 [19200/50000]
loss: 2.108332 [25600/50000]
loss: 2.120863 [32000/50000]
loss: 2.200187 [38400/50000]
loss: 2.114206 [44800/50000]
Test Error: 
 Accuracy: 25.7%, Avg los

In [None]:
training_loop(n_epochs, model_500, train_dataloader, test_dataloader,
              loss_fn, optimizer_500, early_stopping=True, patience=10)

In [None]:
training_loop(n_epochs, model_1000, train_dataloader, test_dataloader,
              loss_fn, optimizer_1000, early_stopping=True, patience=10)

In [None]:
training_loop(n_epochs, model_5000, train_dataloader, test_dataloader,
              loss_fn, optimizer_5000, early_stopping=True, patience=10)