# Assignment 1

In [2]:
#@title Mount your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
%matplotlib inline
%reload_ext autoreload
%autoreload 2

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
!mkdir -p /content/gdrive/MyDrive/Projects
# cd /content/gdrive/MyDrive/Projects

In [4]:

!git pull origin main

fatal: not a git repository (or any of the parent directories): .git


In [5]:
# rm -rf IFT6135-2025-First-Assignment

# # Clone the repository again
!git clone https://github.com/rasoulpanahi/IFT6135-2025-First-Assignment.git

Cloning into 'IFT6135-2025-First-Assignment'...
remote: Enumerating objects: 102, done.[K
remote: Counting objects: 100% (102/102), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 102 (delta 49), reused 79 (delta 35), pack-reused 0 (from 0)[K
Receiving objects: 100% (102/102), 427.08 KiB | 1.70 MiB/s, done.
Resolving deltas: 100% (49/49), done.


In [None]:
#!git clone https://github.com/rasoulpanahi/IFT6135-2025-First-Assignment.git

fatal: destination path 'IFT6135-2025-First-Assignment' already exists and is not an empty directory.


gdrive	IFT6135-2025-First-Assignment  sample_data


In [6]:
#@title Link your assignment folder & install requirements
#@markdown Enter the path to the assignment folder in your Google Drive
# If you run this notebook locally or on a cluster (i.e. not on Google Colab)
# you can delete this cell which is specific to Google Colab. You may also
# change the paths for data/logs in Arguments below.
import sys
import os
import shutil
import warnings

folder = "/content/drive/MyDrive/Projects/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release" #@param {type:"string"}
!ln -Ts "$folder" /content/assignment 2> /dev/null

# Add the assignment folder to Python path
if '/content/assignment' not in sys.path:
  sys.path.insert(0, '/content/assignment')

# Check if CUDA is available
import torch
if not torch.cuda.is_available():
  warnings.warn('CUDA is not available.')

### Running on GPU
For this assignment, it will be necessary to run your experiments on GPU. To make sure the notebook is running on GPU, you can change the notebook settings with
* (EN) `Edit > Notebook Settings`
* (FR) `Modifier > Paramètres du notebook`


assignment  gdrive  IFT6135-2025-First-Assignment  sample_data


In [8]:

%cd /content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release

/content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release


In [9]:
%matplotlib inline
import warnings

from dataclasses import dataclass
import torch
from torch import optim
from torchvision.datasets import CIFAR10
from torchvision import transforms
from utils import seed_experiment, to_device, cross_entropy_loss, compute_accuracy
from config import get_config_parser
import json
from mlp import MLP
from resnet18 import ResNet18
from mlpmixer import MLPMixer
from tqdm import tqdm
from torch.utils.data import DataLoader
import time
import os

# Local Test
Before run the experiment, here are some local test cases you can run for sanity check

In [10]:
import unittest
import test
suite = unittest.TestLoader().loadTestsFromModule(test)
unittest.TextTestRunner(verbosity=2).run(suite)


----------------------------------------------------------------------
Ran 0 tests in 0.000s

OK


<unittest.runner.TextTestResult run=0 errors=0 failures=0>

## Experiments

Below we define a few default arguments to get you started with your experiments. You are encouraged to modify the function `main_entry()`, as well as these arguments, to fit your needs (e.g. changing hyperparameters, the optimizer, adding regularizations).

In [11]:
@dataclass
class Arguments:
  # Data
  batch_size: int = 128
  # Model
  model: str = 'mlp'  # [mlp, resnet18, mlpmixer]
  model_config: str = "./model_configs/mlp.json" # path to model config json file

  # Optimization
  optimizer: str = 'adamw'  # [sgd, momentum, adam, adamw]
  epochs: int = 15
  lr: float = 1e-3
  momentum: float = 0.9
  weight_decay: float = 5e-4

  # Experiment
  logdir: str = '/content/assignment/logs'
  seed: int = 42

  # Miscellaneous
  device: str = 'cuda'
  visualize : bool = False
  print_every: int = 80

In [13]:
# Main code entry. Train the model and save the logs
from main import train, evaluate
def main_entry(args):
    # Check for the device
    if (args.device == "cuda") and not torch.cuda.is_available():
        warnings.warn(
            "CUDA is not available, make that your environment is "
            "running on GPU (e.g. in the Notebook Settings in Google Colab). "
            'Forcing device="cpu".'
        )
        args.device = "cpu"

    if args.device == "cpu":
        warnings.warn(
            "You are about to run on CPU, and might run out of memory "
            "shortly. You can try setting batch_size=1 to reduce memory usage."
        )

    # Seed the experiment, for repeatability
    seed_experiment(args.seed)

    test_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize([0.49139968, 0.48215841, 0.44653091], [0.24703223, 0.24348513, 0.26158784])
                                     ])
    # For training, we add some augmentation. Networks are too powerful and would overfit.
    train_transform = transforms.Compose([transforms.RandomHorizontalFlip(),
                                          transforms.RandomResizedCrop((32,32),scale=(0.8,1.0),ratio=(0.9,1.1)),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.49139968, 0.48215841, 0.44653091], [0.24703223, 0.24348513, 0.26158784])
                                        ])
    # Loading the training dataset. We need to split it into a training and validation part
    # We need to do a little trick because the validation set should not use the augmentation.
    train_dataset = CIFAR10(root='./data', train=True, transform=train_transform, download=True)
    val_dataset = CIFAR10(root='./data', train=True, transform=test_transform, download=True)
    train_set, _ = torch.utils.data.random_split(train_dataset, [45000, 5000])
    _, val_set = torch.utils.data.random_split(val_dataset, [45000, 5000])

    # Loading the test set
    test_set = CIFAR10(root='./data', train=False, transform=test_transform, download=True)

    # Load model
    print(f'Build model {args.model.upper()}...')
    if args.model_config is not None:
        print(f'Loading model config from {args.model_config}')
        with open(args.model_config) as f:
            model_config = json.load(f)
    else:
        raise ValueError('Please provide a model config json')
    print(f'########## {args.model.upper()} CONFIG ################')
    for key, val in model_config.items():
        print(f'{key}:\t{val}')
    print('############################################')
    model_cls = {'mlp': MLP, 'resnet18': ResNet18, 'mlpmixer': MLPMixer}[args.model]
    model = model_cls(**model_config)
    model.to(args.device)

    # Optimizer
    if args.optimizer == "adamw":
        optimizer = optim.AdamW(
            model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
    elif args.optimizer == "adam":
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    elif args.optimizer == "sgd":
        optimizer = optim.SGD(
            model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
    elif args.optimizer == "momentum":
        optimizer = optim.SGD(
            model.parameters(),
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )

    print(
        f"Initialized {args.model.upper()} model with {sum(p.numel() for p in model.parameters())} "
        f"total parameters, of which {sum(p.numel() for p in model.parameters() if p.requires_grad)} are learnable."
    )

    train_losses, valid_losses = [], []
    train_accs, valid_accs = [], []
    train_times, valid_times = [], []

    # We define a set of data loaders that we can use for various purposes later.
    train_dataloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, pin_memory=True, num_workers=4)
    valid_dataloader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=4)
    test_dataloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=4)
    for epoch in range(args.epochs):
        tqdm.write(f"====== Epoch {epoch} ======>")
        loss, acc, wall_time = train(epoch, model, train_dataloader, optimizer,args)
        train_losses.append(loss)
        train_accs.append(acc)
        train_times.append(wall_time)

        loss, acc, wall_time = evaluate(epoch, model, valid_dataloader,args)
        valid_losses.append(loss)
        valid_accs.append(acc)
        valid_times.append(wall_time)

    test_loss, test_acc, test_time = evaluate(
        epoch, model, test_dataloader, args, mode="test"
    )
    print(f"===== Best validation Accuracy: {max(valid_accs):.3f} =====>")
    return train_losses, valid_losses, train_accs, valid_accs,

    # Save log if logdir provided

    if args.logdir is not None:
        print(f'Writing training logs to {args.logdir}...')
        os.makedirs(args.logdir, exist_ok=True)
        with open(os.path.join(args.logdir, 'results.json'), 'w') as f:
            f.write(json.dumps(
                {
                    "train_losses": train_losses,
                    "valid_losses": valid_losses,
                    "train_accs": train_accs,
                    "valid_accs": valid_accs,
                    "test_loss": test_loss,
                    "test_acc": test_acc
                },
                indent=4,
            ))

        # Visualize
        if args.visualize and args.model in ['resnet18', 'mlpmixer']:
            model.visualize(args.logdir)


In [24]:

import pandas as pd

In [27]:
# Example to run MLP with 15 epochs
config = Arguments(model='mlp',
                   model_config='/content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlp_sigmoid.json',
                   epochs= 30, logdir="exps/mlp_sigmoid_default")
train_losses, valid_losses, train_accs, valid_accs = main_entry(config)

df_sigmoid = pd.DataFrame({'Train Losses': train_losses, 'Valid Losses': valid_losses, 'Train accs': train_accs, 'Valid accs': valid_accs}).assign(Epoch=range(1, len(train_losses) + 1))
df_sigmoid['Activation Function'] = 'Sigmoid'
df_sigmoid

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Build model MLP...
Loading model config from /content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlp_sigmoid.json
########## MLP CONFIG ################
input_size:	3072
hidden_sizes:	[1024, 512, 64, 64]
num_classes:	10
activation:	sigmoid
############################################
Initialized MLP model with 3709194 total parameters, of which 3709194 are learnable.




[TRAIN] Epoch: 0, Iter: 0, Loss: 2.49124
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.98410
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.89980
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.76325
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.78070
== [TRAIN] Epoch: 0, Accuracy: 0.299 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.81250
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.372 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.88219
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.69453
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.81452
[TRAIN] Epoch: 1, Iter: 240, Loss: 1.71607
[TRAIN] Epoch: 1, Iter: 320, Loss: 1.79211
== [TRAIN] Epoch: 1, Accuracy: 0.388 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.70027
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.411 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 1.75566
[TRAIN] Epoch: 2, Iter: 80, Loss: 1.68073
[TRAIN] Epoch: 2, Iter: 160, Loss: 1.58526
[TRAIN] Epoch: 2, Iter: 240, Loss: 1.66338
[TRAIN] Epoch: 2, Iter: 320, Loss: 1.75171
== [TRAIN] Epoch: 2, Accuracy: 0.417 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 1.59961
=== [VAL] Epoch: 2, It

Unnamed: 0,Train Losses,Valid Losses,Train accs,Valid accs,Epoch,Activation Function
0,1.929532,1.778379,0.299012,0.372461,1,Tanh
1,1.715692,1.648512,0.388444,0.411328,2,Tanh
2,1.631715,1.573093,0.416511,0.443164,3,Tanh
3,1.572858,1.513617,0.437767,0.47793,4,Tanh
4,1.52926,1.486383,0.457465,0.476953,5,Tanh
5,1.493623,1.445,0.468127,0.491406,6,Tanh
6,1.470069,1.429963,0.477742,0.505469,7,Tanh
7,1.442939,1.396051,0.487847,0.50957,8,Tanh
8,1.418792,1.386294,0.496261,0.511133,9,Tanh
9,1.400922,1.364998,0.501647,0.522852,10,Tanh


In [26]:
# Example to run MLP with 15 epochs
config = Arguments(model='mlp',
                   model_config='/content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlp.json',
                   epochs= 30, logdir="exps/mlp_default")
train_losses, valid_losses, train_accs, valid_accs = main_entry(config)

df_relu = pd.DataFrame({'Train Losses': train_losses, 'Valid Losses': valid_losses, 'Train accs': train_accs, 'Valid accs': valid_accs}).assign(Epoch=range(1, len(train_losses) + 1))
df_relu['Activation Function'] = 'ReLU'
df_relu

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Build model MLP...
Loading model config from /content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlp.json
########## MLP CONFIG ################
input_size:	3072
hidden_sizes:	[1024, 512, 64, 64]
num_classes:	10
activation:	relu
############################################
Initialized MLP model with 3709194 total parameters, of which 3709194 are learnable.




[TRAIN] Epoch: 0, Iter: 0, Loss: 2.61241
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.77236
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.74864
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.54849
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.66133
== [TRAIN] Epoch: 0, Accuracy: 0.358 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.69828
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.413 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.72243
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.56231
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.62889
[TRAIN] Epoch: 1, Iter: 240, Loss: 1.76265
[TRAIN] Epoch: 1, Iter: 320, Loss: 1.73134
== [TRAIN] Epoch: 1, Accuracy: 0.430 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.57578
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.471 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 1.62931
[TRAIN] Epoch: 2, Iter: 80, Loss: 1.73353
[TRAIN] Epoch: 2, Iter: 160, Loss: 1.56528
[TRAIN] Epoch: 2, Iter: 240, Loss: 1.48135
[TRAIN] Epoch: 2, Iter: 320, Loss: 1.47238
== [TRAIN] Epoch: 2, Accuracy: 0.458 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 1.52025
=== [VAL] Epoch: 2, It

Unnamed: 0,Train Losses,Valid Losses,Train accs,Valid accs,Epoch,Activation Function
0,1.803233,1.646389,0.358351,0.413281,1,ReLU
1,1.598008,1.509481,0.429754,0.470508,2,ReLU
2,1.516049,1.46863,0.457755,0.471875,3,ReLU
3,1.454481,1.411595,0.48097,0.498047,4,ReLU
4,1.403588,1.340204,0.498598,0.533008,5,ReLU
5,1.368076,1.307345,0.513867,0.540625,6,ReLU
6,1.329773,1.247441,0.528178,0.563477,7,ReLU
7,1.308459,1.235954,0.536903,0.572461,8,ReLU
8,1.278932,1.222591,0.546497,0.565234,9,ReLU
9,1.251271,1.184694,0.556023,0.589648,10,ReLU


In [25]:
# Example to run MLP with 15 epochs
config = Arguments(model='mlp',
                   model_config='/content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlp_tanh.json',
                   epochs= 30, logdir="exps/mlp_tanh_default")
train_losses, valid_losses, train_accs, valid_accs = main_entry(config)

df = pd.DataFrame({'Train Losses': train_losses, 'Valid Losses': valid_losses, 'Train accs': train_accs, 'Valid accs': valid_accs}).assign(Epoch=range(1, len(train_losses) + 1))
df['Activation Function'] = 'Tanh'
df



Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Build model MLP...
Loading model config from /content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlp_tanh.json
########## MLP CONFIG ################
input_size:	3072
hidden_sizes:	[1024, 512, 64, 64]
num_classes:	10
activation:	tanh
############################################
Initialized MLP model with 3709194 total parameters, of which 3709194 are learnable.




[TRAIN] Epoch: 0, Iter: 0, Loss: 2.48179
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.90319
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.67422
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.71224
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.88699
== [TRAIN] Epoch: 0, Accuracy: 0.341 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.88315
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.378 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.84177
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.67996
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.81169
[TRAIN] Epoch: 1, Iter: 240, Loss: 1.81872
[TRAIN] Epoch: 1, Iter: 320, Loss: 1.84161
== [TRAIN] Epoch: 1, Accuracy: 0.385 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.69675
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.404 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 1.72600
[TRAIN] Epoch: 2, Iter: 80, Loss: 1.83291
[TRAIN] Epoch: 2, Iter: 160, Loss: 1.60202
[TRAIN] Epoch: 2, Iter: 240, Loss: 1.75192
[TRAIN] Epoch: 2, Iter: 320, Loss: 1.80821
== [TRAIN] Epoch: 2, Accuracy: 0.400 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 1.68978
=== [VAL] Epoch: 2, It

Unnamed: 0,Train Losses,Valid Losses,Train accs,Valid accs,Epoch,Activation Function
0,1.87678,1.809609,0.341168,0.377539,1,Tanh
1,1.759014,1.711079,0.384882,0.404102,2,Tanh
2,1.714775,1.688537,0.399884,0.418555,3,Tanh
3,1.681303,1.650801,0.414953,0.429883,4,Tanh
4,1.646929,1.611948,0.426416,0.451562,5,Tanh
5,1.632255,1.597633,0.425681,0.445117,6,Tanh
6,1.609509,1.589829,0.435274,0.45,7,Tanh
7,1.600469,1.579528,0.43543,0.458789,8,Tanh
8,1.586251,1.561523,0.441262,0.45957,9,Tanh
9,1.577234,1.537864,0.444867,0.456836,10,Tanh


In [31]:

df_all = pd.concat([df_sigmoid, df_relu, df])
df_all.to_csv('df_mlp_with_diff_activation.csv')

In [17]:
1e-3

0.001

In [19]:
# Example to run MLP with 15 epochs
import pandas as pd
df_all_resnet = []
for lr in [1e-1 , 1e-2 , 1e-3, 1e-4, 1e-5]:
  config = Arguments(model='resnet18',
                    model_config='/content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/resnet18.json',
                    epochs= 40, logdir="exps/resnet18" , lr = lr )
  train_losses, valid_losses, train_accs, valid_accs = main_entry(config)

  df = pd.DataFrame({'Train Losses': train_losses, 'Valid Losses': valid_losses, 'Train accs': train_accs, 'Valid accs': valid_accs}).assign(Epoch=range(1, len(train_losses) + 1))
  df['Learning Rate'] = lr
  df_all_resnet.append(df)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Build model RESNET18...
Loading model config from /content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/resnet18.json
########## RESNET18 CONFIG ################
num_classes:	10
############################################
Initialized RESNET18 model with 11173962 total parameters, of which 11173962 are learnable.




[TRAIN] Epoch: 0, Iter: 0, Loss: 2.35414
[TRAIN] Epoch: 0, Iter: 80, Loss: 2.13083
[TRAIN] Epoch: 0, Iter: 160, Loss: 2.04942
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.88022
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.81179
== [TRAIN] Epoch: 0, Accuracy: 0.225 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.81541
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.337 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.85740
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.56908
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.65282
[TRAIN] Epoch: 1, Iter: 240, Loss: 1.58355
[TRAIN] Epoch: 1, Iter: 320, Loss: 1.45478
== [TRAIN] Epoch: 1, Accuracy: 0.379 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.69174
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.390 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 1.79429
[TRAIN] Epoch: 2, Iter: 80, Loss: 1.60298
[TRAIN] Epoch: 2, Iter: 160, Loss: 1.68818
[TRAIN] Epoch: 2, Iter: 240, Loss: 1.56135
[TRAIN] Epoch: 2, Iter: 320, Loss: 1.39268
== [TRAIN] Epoch: 2, Accuracy: 0.448 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 1.45086
=== [VAL] Epoch: 2, It



[TRAIN] Epoch: 0, Iter: 0, Loss: 2.35414
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.80547
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.76577
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.61652
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.53608
== [TRAIN] Epoch: 0, Accuracy: 0.351 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.45470
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.446 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.60079
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.33400
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.27345
[TRAIN] Epoch: 1, Iter: 240, Loss: 1.30770
[TRAIN] Epoch: 1, Iter: 320, Loss: 1.18779
== [TRAIN] Epoch: 1, Accuracy: 0.509 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.34599
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.584 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 1.32911
[TRAIN] Epoch: 2, Iter: 80, Loss: 1.21138
[TRAIN] Epoch: 2, Iter: 160, Loss: 1.26636
[TRAIN] Epoch: 2, Iter: 240, Loss: 1.05696
[TRAIN] Epoch: 2, Iter: 320, Loss: 1.00084
== [TRAIN] Epoch: 2, Accuracy: 0.607 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 1.19231
=== [VAL] Epoch: 2, It



[TRAIN] Epoch: 0, Iter: 0, Loss: 2.35414
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.58104
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.41598
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.36446
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.19514
== [TRAIN] Epoch: 0, Accuracy: 0.478 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.48316
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.548 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.02808
[TRAIN] Epoch: 1, Iter: 80, Loss: 0.96841
[TRAIN] Epoch: 1, Iter: 160, Loss: 0.93292
[TRAIN] Epoch: 1, Iter: 240, Loss: 0.91708
[TRAIN] Epoch: 1, Iter: 320, Loss: 0.79370
== [TRAIN] Epoch: 1, Accuracy: 0.662 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.08330
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.634 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 0.84451
[TRAIN] Epoch: 2, Iter: 80, Loss: 0.78177
[TRAIN] Epoch: 2, Iter: 160, Loss: 0.74784
[TRAIN] Epoch: 2, Iter: 240, Loss: 0.84980
[TRAIN] Epoch: 2, Iter: 320, Loss: 0.70575
== [TRAIN] Epoch: 2, Accuracy: 0.740 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 0.93155
=== [VAL] Epoch: 2, It



[TRAIN] Epoch: 0, Iter: 0, Loss: 2.35414
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.42008
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.48056
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.20674
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.08871
== [TRAIN] Epoch: 0, Accuracy: 0.520 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.14254
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.623 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 0.94476
[TRAIN] Epoch: 1, Iter: 80, Loss: 0.88517
[TRAIN] Epoch: 1, Iter: 160, Loss: 0.93420
[TRAIN] Epoch: 1, Iter: 240, Loss: 0.92325
[TRAIN] Epoch: 1, Iter: 320, Loss: 0.70693
== [TRAIN] Epoch: 1, Accuracy: 0.677 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 0.90248
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.706 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 0.92170
[TRAIN] Epoch: 2, Iter: 80, Loss: 0.70922
[TRAIN] Epoch: 2, Iter: 160, Loss: 0.77679
[TRAIN] Epoch: 2, Iter: 240, Loss: 0.76666
[TRAIN] Epoch: 2, Iter: 320, Loss: 0.68475
== [TRAIN] Epoch: 2, Accuracy: 0.746 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 0.79032
=== [VAL] Epoch: 2, It



[TRAIN] Epoch: 0, Iter: 0, Loss: 2.35414
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.72682
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.80058
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.56264
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.45335
== [TRAIN] Epoch: 0, Accuracy: 0.381 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.52121
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.474 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.45164
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.37177
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.29110
[TRAIN] Epoch: 1, Iter: 240, Loss: 1.32212
[TRAIN] Epoch: 1, Iter: 320, Loss: 1.21282
== [TRAIN] Epoch: 1, Accuracy: 0.510 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.36388
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.545 ===>
[TRAIN] Epoch: 2, Iter: 0, Loss: 1.48306
[TRAIN] Epoch: 2, Iter: 80, Loss: 1.28928
[TRAIN] Epoch: 2, Iter: 160, Loss: 1.38964
[TRAIN] Epoch: 2, Iter: 240, Loss: 1.25649
[TRAIN] Epoch: 2, Iter: 320, Loss: 1.14172
== [TRAIN] Epoch: 2, Accuracy: 0.558 ==>
[VAL] Epoch: 2, Iter: 0, Loss: 1.23342
=== [VAL] Epoch: 2, It

In [23]:

pwd

'/content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release'

In [22]:
pd.concat(df_all_resnet).to_csv('df_resnet18_five_lr.csv')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Example to run MLP with 15 epochs
config = Arguments(model='mlpmixer',
                   model_config='/content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlpmixer.json',
                   epochs=2, logdir="exps/mlpmixer")
main_entry(config)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Build model MLPMIXER...
Loading model config from /content/IFT6135-2025-First-Assignment/HW1_2025/assignment1_release/model_configs/mlpmixer.json
########## MLPMIXER CONFIG ################
num_classes:	10
img_size:	32
patch_size:	4
embed_dim:	256
num_blocks:	4
drop_rate:	0.0
activation:	gelu
############################################
Initialized MLPMIXER model with 2188298 total parameters, of which 2188298 are learnable.




[TRAIN] Epoch: 0, Iter: 0, Loss: 2.31901
[TRAIN] Epoch: 0, Iter: 80, Loss: 1.57623
[TRAIN] Epoch: 0, Iter: 160, Loss: 1.61766
[TRAIN] Epoch: 0, Iter: 240, Loss: 1.41238
[TRAIN] Epoch: 0, Iter: 320, Loss: 1.47204
== [TRAIN] Epoch: 0, Accuracy: 0.421 ==>
[VAL] Epoch: 0, Iter: 0, Loss: 1.39669
=== [VAL] Epoch: 0, Iter: 39, Accuracy: 0.522 ===>
[TRAIN] Epoch: 1, Iter: 0, Loss: 1.21684
[TRAIN] Epoch: 1, Iter: 80, Loss: 1.32808
[TRAIN] Epoch: 1, Iter: 160, Loss: 1.19022
[TRAIN] Epoch: 1, Iter: 240, Loss: 1.21163
[TRAIN] Epoch: 1, Iter: 320, Loss: 1.03568
== [TRAIN] Epoch: 1, Accuracy: 0.561 ==>
[VAL] Epoch: 1, Iter: 0, Loss: 1.21091
=== [VAL] Epoch: 1, Iter: 39, Accuracy: 0.610 ===>
[TEST] Epoch: 1, Iter: 0, Loss: 1.00693
=== [TEST] Epoch: 1, Iter: 78, Accuracy: 0.599 ===>
===== Best validation Accuracy: 0.610 =====>
Writing training logs to exps/mlpmixer...
