# Assignment 1

In [2]:
!git clone https://github.com/neuronadine/IFT6135-2025.git

Cloning into 'IFT6135-2025'...
remote: Enumerating objects: 41, done.[K
remote: Counting objects: 100% (28/28), done.[K
remote: Compressing objects: 100% (20/20), done.[K
remote: Total 41 (delta 12), reused 12 (delta 8), pack-reused 13 (from 1)[K
Receiving objects: 100% (41/41), 415.47 KiB | 1.59 MiB/s, done.
Resolving deltas: 100% (14/14), done.


In [4]:
#@title Mount your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
%matplotlib inline
%load_ext autoreload
%autoreload 2

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [13]:
!ls /content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release

config.py  main.ipynb  main.py	mlpmixer.py  mlp.py  model_configs  resnet18.py  test.py  utils.py


In [16]:
#@title Link your assignment folder & install requirements
#@markdown Enter the path to the assignment folder in your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
import sys
import os
import shutil
import warnings

folder = "/content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/" #@param {type:"string"}
!ln -Ts "$folder" /content/assignment 2> /dev/null

# Add the assignment folder to Python path
if '/content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/' not in sys.path:
  sys.path.insert(0, '/content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/')

# Check if CUDA is available
import torch
if not torch.cuda.is_available():
  warnings.warn('CUDA is not available.')

### Running on GPU
For this assignment, it will be necessary to run your experiments on GPU. To make sure the notebook is running on GPU, you can change the notebook settings with
* (EN) `Edit > Notebook Settings`
* (FR) `Modifier > Paramètres du notebook`


In [17]:
%matplotlib inline
import warnings

from dataclasses import dataclass
import torch
from torch import optim
from torchvision.datasets import CIFAR10
from torchvision import transforms
from utils import seed_experiment, to_device, cross_entropy_loss, compute_accuracy
from config import get_config_parser
import json
from mlp import MLP
from resnet18 import ResNet18
from mlpmixer import MLPMixer
from tqdm import tqdm
from torch.utils.data import DataLoader
import time
import os

# Local Test
Before run the experiment, here are some local test cases you can run for sanity check

In [18]:
import unittest
import test
suite = unittest.TestLoader().loadTestsFromModule(test)
unittest.TextTestRunner(verbosity=2).run(suite)

test_linear_attributes (test.TestLinear.test_linear_attributes) ... ok
test_linear_forward (test.TestLinear.test_linear_forward) ... ok
test_activation (test.TestMLP.test_activation) ... ok
test_forward (test.TestMLP.test_forward) ... ok
test_mlp (test.TestMLP.test_mlp) ... ok
test_mixer_block (test.TestMLPMixer.test_mixer_block) ... ok
test_mlpmixer (test.TestMLPMixer.test_mlpmixer) ... ok
test_patch_emb (test.TestMLPMixer.test_patch_emb) ... ok
test_basic_block (test.TestResNet.test_basic_block) ... ok
test_basic_block2 (test.TestResNet.test_basic_block2) ... ok
test_resnet (test.TestResNet.test_resnet) ... ok
test_ce_loss (test.TestUtils.test_ce_loss) ... ok

----------------------------------------------------------------------
Ran 12 tests in 1.773s

OK


<unittest.runner.TextTestResult run=12 errors=0 failures=0>

## Experiments

Below we define a few default arguments to get you started with your experiments. You are encouraged to modify the function `main_entry()`, as well as these arguments, to fit your needs (e.g. changing hyperparameters, the optimizer, adding regularizations).

In [19]:
@dataclass
class Arguments:
  # Data
  batch_size: int = 128
  # Model
  model: str = 'mlp'  # [mlp, resnet18, mlpmixer]
  model_config: str = "./model_configs/mlp.json" # path to model config json file

  # Optimization
  optimizer: str = 'adamw'  # [sgd, momentum, adam, adamw]
  epochs: int = 15
  lr: float = 1e-3
  momentum: float = 0.9
  weight_decay: float = 5e-4

  # Experiment
  logdir: str = '/content/assignment/logs'
  seed: int = 42

  # Miscellaneous
  device: str = 'cuda'
  visualize : bool = False
  print_every: int = 80

In [20]:
# Main code entry. Train the model and save the logs
from main import train, evaluate
def main_entry(args):
    # Check for the device
    if (args.device == "cuda") and not torch.cuda.is_available():
        warnings.warn(
            "CUDA is not available, make that your environment is "
            "running on GPU (e.g. in the Notebook Settings in Google Colab). "
            'Forcing device="cpu".'
        )
        args.device = "cpu"

    if args.device == "cpu":
        warnings.warn(
            "You are about to run on CPU, and might run out of memory "
            "shortly. You can try setting batch_size=1 to reduce memory usage."
        )

    # Seed the experiment, for repeatability
    seed_experiment(args.seed)

    test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize([0.49139968, 0.48215841, 0.44653091], [0.24703223, 0.24348513, 0.26158784])
                                     ])
    # For training, we add some augmentation. Networks are too powerful and would overfit.
    train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                          transforms.RandomResizedCrop((32,32),scale=(0.8,1.0),ratio=(0.9,1.1)),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.49139968, 0.48215841, 0.44653091], [0.24703223, 0.24348513, 0.26158784])
                                        ])
    # Loading the training dataset. We need to split it into a training and validation part
    # We need to do a little trick because the validation set should not use the augmentation.
    train_dataset = CIFAR10(root='./data', train=True, transform=train_transform, download=True)
    val_dataset = CIFAR10(root='./data', train=True, transform=test_transform, download=True)
    train_set, _ = torch.utils.data.random_split(train_dataset, [45000, 5000])
    _, val_set = torch.utils.data.random_split(val_dataset, [45000, 5000])

    # Loading the test set
    test_set = CIFAR10(root='./data', train=False, transform=test_transform, download=True)

    # Load model
    print(f'Build model {args.model.upper()}...')
    if args.model_config is not None:
        print(f'Loading model config from {args.model_config}')
        with open(args.model_config) as f:
            model_config = json.load(f)
    else:
        raise ValueError('Please provide a model config json')
    print(f'########## {args.model.upper()} CONFIG ################')
    for key, val in model_config.items():
        print(f'{key}:\t{val}')
    print('############################################')
    model_cls = {'mlp': MLP, 'resnet18': ResNet18, 'mlpmixer': MLPMixer}[args.model]
    model = model_cls(**model_config)
    model.to(args.device)

    # Optimizer
    if args.optimizer == "adamw":
        optimizer = optim.AdamW(
            model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
    elif args.optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    elif args.optimizer == "sgd":
        optimizer = optim.SGD(
            model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
    elif args.optimizer == "momentum":
        optimizer = optim.SGD(
            model.parameters(),
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )

    print(
        f"Initialized {args.model.upper()} model with {sum(p.numel() for p in model.parameters())} "
        f"total parameters, of which {sum(p.numel() for p in model.parameters() if p.requires_grad)} are learnable."
    )

    train_losses, valid_losses = [], []
    train_accs, valid_accs = [], []
    train_times, valid_times = [], []

    # We define a set of data loaders that we can use for various purposes later.
    train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, pin_memory=True, num_workers=4)
    valid_dataloader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=4)
    test_dataloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=4)
    for epoch in range(args.epochs):
        tqdm.write(f"====== Epoch {epoch} ======>")
        loss, acc, wall_time = train(epoch, model, train_dataloader, optimizer,args)
        train_losses.append(loss)
        train_accs.append(acc)
        train_times.append(wall_time)

        loss, acc, wall_time = evaluate(epoch, model, valid_dataloader,args)
        valid_losses.append(loss)
        valid_accs.append(acc)
        valid_times.append(wall_time)

    test_loss, test_acc, test_time = evaluate(
        epoch, model, test_dataloader, args, mode="test"
    )
    print(f"===== Best validation Accuracy: {max(valid_accs):.3f} =====>")

    # Save log if logdir provided
    if args.logdir is not None:
        print(f'Writing training logs to {args.logdir}...')
        os.makedirs(args.logdir, exist_ok=True)
        with open(os.path.join(args.logdir, 'results.json'), 'w') as f:
            f.write(json.dumps(
                {
                    "train_losses": train_losses,
                    "valid_losses": valid_losses,
                    "train_accs": train_accs,
                    "valid_accs": valid_accs,
                    "test_loss": test_loss,
                    "test_acc": test_acc
                },
                indent=4,
            ))

        # Visualize
        if args.visualize and args.model in ['resnet18', 'mlpmixer']:
            model.visualize(args.logdir)


In [23]:
# Example to run MLP with 15 epochs
config = Arguments(model='mlp',
                   model_config='/content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/model_configs/mlp.json',
                   epochs=1, logdir="/content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/exps/mlp_default")
main_entry(config)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Build model MLP...
Loading model config from /content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/model_configs/mlp.json
########## MLP CONFIG ################
input_size:	3072
hidden_sizes:	[1024, 512, 64, 64]
num_classes:	10
activation:	relu
############################################
Initialized MLP model with 3709194 total parameters, of which 3709194 are learnable.
[TRAIN] Epoch: 0, Iter: 0, Loss: 2.46330
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.76476
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.95951
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.57365
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.47636
== [TRAIN] Epoch: 0, Accuracy: 0.357 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.70149
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.410 ===>
[TEST] Epoch: 0, Iter: 0, Loss: 1.50513
=== [TEST] Epoch: 0, Iter: 78, Accuracy: 0.419 ===>
===== Best validation Accuracy: 0.410 =====>
Writing training lo

In [None]:
learning_rates = [0.1, 0.01, 0.001, 0.0001, 0.00001]
results = {lr: {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []} for lr in learning_rates}

for lr in learning_rates:
    print(f"Training ResNet18 with learning rate {lr}...")

    # Run experiment with the given learning rate
    config = Arguments(
        model='resnet18',
        model_config='/content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/model_configs/resnet18.json',
        epochs=10,
        lr=lr,
        logdir=f"/content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/exps/resnet/{lr}"
    )

    main_entry(config)

Training ResNet18 with learning rate 0.1...
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Build model RESNET18...
Loading model config from /content/gdrive/MyDrive/IFT6135-2025/HW1_2025/assignment1_release/model_configs/resnet18.json
########## RESNET18 CONFIG ################
num_classes:	10
############################################
Initialized RESNET18 model with 11173962 total parameters, of which 11173962 are learnable.
[TRAIN] Epoch: 0, Iter: 0, Loss: 2.35367
[TRAIN] Epoch: 0, Iter: 80, Loss: 2.09046
[TRAIN] Epoch: 0, Iter: 160, Loss: 2.08024
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.79001
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.72314
== [TRAIN] Epoch: 0, Accuracy: 0.250 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.82019
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.372 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.77814
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.58036
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.63225
[TRAIN] Epoch: 1, Iter: 240, Los

In [None]:
learning_rates = [0.1, 0.01, 0.001, 0.0001, 0.00001]
results = {lr: {'train_loss': [], 'val_loss': [], 'train_acc': [], 'val_acc': []} for lr in learning_rates}

for lr in learning_rates:
  # Load results from log files
  with open(f"exps/resnet/{lr}/results.json") as f:
    logs = json.load(f)

  # Store results
  results[lr]['train_loss'] = logs['train_losses']
  results[lr]['val_loss'] = logs['valid_losses']
  results[lr]['train_acc'] = logs['train_accs']
  results[lr]['val_acc'] = logs['valid_accs']

# Plot results
epochs = range(1, 11)

# Plot Training Loss
plt.figure()
for lr in learning_rates:
  plt.plot(epochs, results[lr]['train_loss'], label=f'LR={lr}')
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.legend()
plt.title("Training Loss vs Epochs (ResNet18, Adam)")
plt.savefig("resnet18_train_loss.png")

# Plot Validation Loss
plt.figure()
for lr in learning_rates:
  plt.plot(epochs, results[lr]['val_loss'], label=f'LR={lr}')
plt.xlabel("Epoch")
plt.ylabel("Validation Loss")
plt.legend()
plt.title("Validation Loss vs Epochs (ResNet18, Adam)")
plt.savefig("resnet18_val_loss.png")

# Plot Training Accuracy
plt.figure()
for lr in learning_rates:
    plt.plot(epochs, results[lr]['train_acc'], label=f'LR={lr}')
plt.xlabel("Epoch")
plt.ylabel("Training Accuracy")
plt.legend()
plt.title("Training Accuracy vs Epochs (ResNet18, Adam)")
plt.savefig("resnet18_train_acc.png")

# Plot Validation Accuracy
plt.figure()
for lr in learning_rates:
    plt.plot(epochs, results[lr]['val_acc'], label=f'LR={lr}')
plt.xlabel("Epoch")
plt.ylabel("Validation Accuracy")
plt.legend()
plt.title("Validation Accuracy vs Epochs (ResNet18, Adam)")
plt.savefig("resnet18_val_acc.png")

plt.show()