In [1]:
%load_ext autoreload
%autoreload 2

# Exercise 2

<img src="./images/02.png" width=800>

In [None]:
import time
from tqdm import tqdm
import torch
import numpy as np
from torch.utils.data import DataLoader, Subset
import torch.nn as nn 
from typing import DefaultDict, Any, Callable, Optional
import mlflow
import os
from utils import train_network, accuracy_score_wrapper, weight_reset
import torchvision
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torchinfo import summary

  from tqdm.autonotebook import tqdm


In [3]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns_2'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [4]:
mlflow.set_experiment('Exercise_2')

2025/06/07 11:30:21 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_2' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_05/mlruns_2/312535489251545199', creation_time=1749283221720, experiment_id='312535489251545199', last_update_time=1749283221720, lifecycle_stage='active', name='Exercise_2', tags={}>

In [5]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset

In [6]:
B = 256 # a respectable average batch size
train_data = torchvision.datasets.FashionMNIST("./data", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./data", train=False, transform=transforms.ToTensor(), download=True)

## Model

In [7]:
#How many values are in the input? We use this to help determine the size of subsequent layers
D = 28*28 #28 * 28 images 
#Hidden layer size
n = 128 
#How many channels are in the input?
C = 1
#How many classes are there?
classes = 10

fc_model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(D,  n),
    nn.Tanh(),
    nn.Linear(n,  n),
    nn.Tanh(),
    nn.Linear(n,  n),
    nn.Tanh(),
    nn.Linear(n, classes),
)

## Training

In [8]:
eta_0 = 0.001
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs = 20
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    }

In [None]:
def optimizers(model):
    yield 'SGD+Nesterov', torch.optim.SGD(model.parameters(), lr=0.001, momentum=.9, nesterov=True)
    yield 'AdamW', torch.optim.AdamW(model.parameters())

In [10]:
epochs = 20
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
}

In [None]:
for batch_size in [1, 4, 16, 32, 64, 128]:
    fc_model.apply(weight_reset)
    print(f'Batch: {batch_size}')
    params['batch_size'] = batch_size
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size)
    
    for experiment, optimizer in optimizers(fc_model):
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs//3)
        params['optimizer'] = optimizer.defaults
        params['lr_schedular'] = type(scheduler).__name__
        params['T_max'] = epochs//3
    
        with open('model_summary.txt', 'w') as f:
            f.write(str(summary(fc_model)))
        with mlflow.start_run(nested=True, run_name=f'{experiment}_{batch_size}'):
            mlflow.log_params(params)
            mlflow.log_artifact('model_summary.txt')

            results = train_network(
                model=fc_model,
                optimizer=optimizer,
                loss_func=loss_func,
                train_loader=train_loader,
                test_loader=test_loader,
                epochs=epochs,
                device=device,
                score_funcs=score_funcs,
                lr_schedule=scheduler
                )

### Results for AdamW+CosineAnnealingLR

<img src="./images/E2_train_acc_adamw.png">

<img src="./images/E2_train_loss_adamw.png">

<img src="./images/E2_test_acc_adamw.png">

<img src="./images/E2_test_loss_adamw.png">

### Results for SGD+Nesterov+CosineAnnealingLR

<img src="./images/E2_train_acc_sgdneterov.png">

<img src="./images/E2_train_loss_sgdneterov.png">

<img src="./images/E2_test_acc_sgdneterov.png">

<img src="./images/E2_test_loss_sgdneterov.png">

### Results Grouped by Optimizer

<img src="./images/E2_test_acc_optimizers.png">

<img src="./images/E2_test_loss_optimizers.png">

<img src="./images/E2_time_optimizers.png">