In [1]:
%load_ext autoreload
%autoreload 2

# Exercise 3

<img src="./images/03.png" width=800>

In [None]:
import time
import torch
import numpy as np
from torch.utils.data import DataLoader, random_split
import torch.nn as nn 
from typing import DefaultDict, Any, Callable, Optional
import mlflow
import os
from utils import train_network, accuracy_score_wrapper
import torchvision
from torchvision import transforms
from torchinfo import summary
import mlflow

In [None]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns_3'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
mlflow.set_experiment('Exercise_3')

2025/06/07 10:04:44 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_1' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_05/mlruns_1/454812752021810061', creation_time=1749278084155, experiment_id='454812752021810061', last_update_time=1749278084155, lifecycle_stage='active', name='Exercise_1', tags={}>

In [None]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and Dataloader

In [None]:
train_data = torchvision.datasets.FashionMNIST("./data", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./data", train=False, transform=transforms.ToTensor(), download=True)
batch = 256
train_loader = DataLoader(train_data, batch_size=batch, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch)

## Training

In [None]:
eta_0 = 0.001
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}

In [None]:
def optimizers(model):
    yield 'SGD', torch.optim.SGD(model.parameters(), lr=eta_0)
    yield 'AdamW', torch.optim.AdamW(model.parameters())

In [None]:
epochs = 20
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch
}

In [None]:
D = 28*28 #28 * 28 images 
n = 256
C = 1
classes = 10
for hidden_layer in [1, 6, 12, 24]:
    print(f'hidden_layer= {hidden_layer}')
    # Model
    layers = [nn.Flatten(),
        nn.Linear(D,  n),
        nn.Tanh()]
    for _ in range(hidden_layer-1):
        layers.extend([nn.Linear(n,  n), nn.Tanh()])

    layers.append(nn.Linear(n, classes))
    fc_model = nn.Sequential(*layers)
    params['hidden_layer'] = hidden_layer

    for experiment, optimizer in optimizers(fc_model):

        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs//3)
        params['lr_schedular'] = type(scheduler).__name__
        params['T_max'] = epochs//3
        params['optimizer'] = optimizer.defaults
        params['experiment'] = experiment

        with open('model_summary.txt', 'w') as f:
            f.write(str(summary(fc_model,  (C, 28, 28))))
        with mlflow.start_run(nested=True, run_name=f'{experiment}_{hidden_layer}'):
            mlflow.log_params(params)
            mlflow.log_artifact('model_summary.txt')

            results = train_network(
                model=fc_model,
                optimizer=optimizer,
                loss_func=loss_func,
                train_loader=train_loader,
                test_loader=test_loader,
                epochs=epochs,
                device=device,
                score_funcs=score_funcs,
                lr_schedule=scheduler
                )

### Results for AdamW

<img src="./images/E3_test_acc_adamw.png">

<img src="./images/E3_test_loss_adamw.png">

### Results for SGD 

<img src="./images/E3_test_acc_sgd.png">

<img src="./images/E3_test_loss_sgd.png">

### Results Grouped by optimizers

<img src="./images/E3_test_acc_optimizer.png">

<img src="./images/E3_test_loss_optimizer.png">

<img src="./images/E3_time_optimizer.png">