In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Exercise 1

<img src="./images/01.png" width=800>

In [28]:
import time
from tqdm import tqdm
import torch
import numpy as np
from torch.utils.data import DataLoader, Subset
import torch.nn as nn 
from typing import DefaultDict, Any, Callable, Optional
import mlflow
import os
from utils import train_network, accuracy_score_wrapper
import torchvision
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torchinfo import summary

In [None]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns05_1'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [18]:
mlflow.set_experiment('Exercise_1')

2025/06/07 10:04:44 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_1' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_05/mlruns_1/454812752021810061', creation_time=1749278084155, experiment_id='454812752021810061', last_update_time=1749278084155, lifecycle_stage='active', name='Exercise_1', tags={}>

In [19]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and Dataloaders

In [20]:
B = 256 # a respectable average batch size
train_data = torchvision.datasets.FashionMNIST("./data", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./data", train=False, transform=transforms.ToTensor(), download=True)

In [22]:
train_idx, valid_idx = train_test_split(np.arange(len(train_data)),
                test_size=.1,
                random_state=42,
                shuffle=True,
                stratify=train_data.targets)
train_sub_set = Subset(train_data, train_idx)
valid_sub_set = Subset(train_data, valid_idx)

In [23]:
train_loader = DataLoader(train_sub_set, batch_size=B, shuffle=True)
valid_loader = DataLoader(valid_sub_set, batch_size=B)
test_loader = DataLoader(test_data, batch_size=B)

## Model

In [24]:
#How many values are in the input? We use this to help determine the size of subsequent layers
D = 28*28 #28 * 28 images 
#Hidden layer size
n = 128 
#How many channels are in the input?
C = 1
#How many classes are there?
classes = 10

fc_model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(D,  n),
    nn.Tanh(),
    nn.Linear(n,  n),
    nn.Tanh(),
    nn.Linear(n,  n),
    nn.Tanh(),
    nn.Linear(n, classes),
)

## Training

In [25]:
eta_0 = 0.001
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(fc_model.parameters(), lr=eta_0)
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs = 20
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    }

In [35]:
with open('model_summary.txt', 'w') as f:
    f.write(str(summary(fc_model)))
with mlflow.start_run(nested=True, run_name=f'lr_schedule="ReduceLROnPlateau"'):
    mlflow.log_artifact('model_summary.txt')
    mlflow.log_params(params)
    fc_results = train_network(
        model=fc_model,
        loss_func=loss_func,
        train_loader=train_loader,
        valid_loader=valid_loader,
        test_loader=test_loader,
        epochs=epochs,
        optimizer=optimizer,
        score_funcs=score_funcs,
        device=device,
        lr_schedule='ReduceLROnPlateau'
        )

Epoch: 100%|██████████| 20/20 [02:49<00:00,  8.49s/it]


<img src="./images/E1_train_valid_test_acc.png">

<img src="./images/E1_train_valid_test_loss.png">