In [None]:
%load_ext autoreload
%autoreload 2

# Exercise 1

<img src="./images/01.png" width=800>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision 
from torchvision import transforms

from torch.utils.data import Dataset, DataLoader

import os
import mlflow
from torchinfo import summary
from utils import train_network, accuracy_score_wrapper



  from tqdm.autonotebook import tqdm


In [None]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns06_1'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
mlflow.set_experiment('Exercise06_1')

2025/06/07 12:02:31 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_4' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_05/mlruns_4/626301946211563652', creation_time=1749285151104, experiment_id='626301946211563652', last_update_time=1749285151104, lifecycle_stage='active', name='Exercise_4', tags={}>

In [2]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and Dataloader

In [4]:
train_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)

batch_size = 256
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Using downloaded and verified file: ./FashionMNIST/raw/train-images-idx3-ubyte.gz
Extracting ./FashionMNIST/raw/train-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Using downloaded and verified file: ./FashionMNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Using downloaded and verified file: ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz
Extracting ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw/t10k-la

100%|██████████| 5148/5148 [00:00<00:00, 6206460.76it/s]


Extracting ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw



In [23]:
W = train_data.data.shape[1] 
H = train_data.data.shape[2]
D = W * H
C = 1
classes = len(torch.unique(train_data.targets))
n_filters = 32

## Models

In [24]:
def cnn_layer(in_filters, out_filters=None, kernel_size=3, activation=nn.LeakyReLU(.1)):
    if out_filters is None:
        out_filters = in_filters
    padding = kernel_size // 2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding),
        activation
    )

In [25]:
def cnn_layer_bn(in_filters, out_filters=None, kernel_size=3, activation=nn.LeakyReLU(.1)):
    if out_filters is None:
        out_filters = in_filters
    padding = kernel_size // 2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding),
        nn.BatchNorm2d(out_filters),
        activation
    )

In [26]:
class ResidualBlockE(nn.Module):
    def __init__(self, channels, kernel_size=3, activation=nn.LeakyReLU(.1)):
        super().__init__()
        
        pad = (kernel_size - 1) // 2
        
        self.F = nn.Sequential(
            nn.Conv2d(channels, channels, kernel_size, padding=pad),
            nn.BatchNorm2d(channels),
            activation,
            nn.Conv2d(channels, channels, kernel_size, padding=pad),
            nn.BatchNorm2d(channels),
            activation,
            )
    def forward(self, x):
        return x + self.F(x)

In [27]:
class ResidualBottelneck(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, activation=nn.LeakyReLU(.1)):
        super().__init__()
        pad = (kernel_size -1) // 2
        bottleneck = max(out_channels//4, in_channels)
        self.F = nn.Sequential(
        nn.BatchNorm2d(in_channels),
        activation,
        nn.Conv2d(in_channels, bottleneck, 1, padding=0),
        
        nn.BatchNorm2d(bottleneck),
        activation,
        nn.Conv2d(bottleneck, bottleneck, kernel_size, padding=pad),
        
        nn.BatchNorm2d(bottleneck),
        activation,
        nn.Conv2d(bottleneck, out_channels, 1, padding=0)
        )
    
        self.shortcut = nn.Identity()
        if in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, padding=0),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        return self.F(x) + self.shortcut(x)

In [28]:
def cnn_res(C, n_filters, classes, activation):
    return nn.Sequential(
    ResidualBottelneck(C, n_filters, activation=activation),
    activation,
    ResidualBlockE(n_filters, activation=activation),
    activation,
    nn.MaxPool2d((2,2)),
    ResidualBottelneck(n_filters, 2*n_filters, activation=activation),
    activation,
    ResidualBlockE(2*n_filters, activation=activation),
    activation,
    nn.MaxPool2d((2,2)),
    ResidualBottelneck(2*n_filters, 4*n_filters, activation=activation),
    activation,
    ResidualBlockE(4*n_filters, activation=activation),
    activation,
    nn.Flatten(),
    nn.Linear(D*n_filters//4, classes),
    )

### ReLU

In [29]:
# Relu
cnn_relu = nn.Sequential(
    cnn_layer(C, n_filters), 
    cnn_layer(n_filters), 
    cnn_layer(n_filters),
    nn.MaxPool2d((2,2)),
    cnn_layer(n_filters, 2*n_filters), 
    cnn_layer(2*n_filters), 
    cnn_layer(2*n_filters),
    nn.MaxPool2d((2,2)),
    cnn_layer(2*n_filters, 4*n_filters), 
    cnn_layer(4*n_filters), 
    nn.Flatten(),
    nn.Linear(D * 4 * n_filters // (4 * 4), classes),
)

In [30]:
summary(cnn_relu, input_size=(batch_size, C, W, H))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [256, 10]                 --
├─Sequential: 1-1                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-1                       [256, 32, 28, 28]         320
├─Sequential: 1-16                       --                        (recursive)
│    └─LeakyReLU: 2-2                    [256, 32, 28, 28]         --
├─Sequential: 1-3                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-3                       [256, 32, 28, 28]         9,248
├─Sequential: 1-16                       --                        (recursive)
│    └─LeakyReLU: 2-4                    [256, 32, 28, 28]         --
├─Sequential: 1-5                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-5                       [256, 32, 28, 28]         9,248
├─Sequential: 1-16                       --                        (recursive)
│    └─LeakyReLU: 2-6                    [256, 32, 

In [31]:
# Relu and BachNormalztion
cnn_relu_bn = nn.Sequential(
    cnn_layer_bn(C, n_filters),
    cnn_layer_bn(n_filters),
    cnn_layer_bn(n_filters),
    nn.MaxPool2d((2,2)),
    cnn_layer_bn(n_filters, 2*n_filters),
    cnn_layer_bn(2*n_filters),
    cnn_layer_bn(2*n_filters),
    nn.MaxPool2d((2,2)),
    cnn_layer_bn(2*n_filters, 4*n_filters),
    cnn_layer_bn(4*n_filters),
    nn.Flatten(),
    nn.Linear(D*n_filters//4, classes),
)

In [32]:
summary(cnn_relu_bn, input_size=(batch_size, C, W, H))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [256, 10]                 --
├─Sequential: 1-1                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-1                       [256, 32, 28, 28]         320
│    └─BatchNorm2d: 2-2                  [256, 32, 28, 28]         64
├─Sequential: 1-16                       --                        (recursive)
│    └─LeakyReLU: 2-3                    [256, 32, 28, 28]         --
├─Sequential: 1-3                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-4                       [256, 32, 28, 28]         9,248
│    └─BatchNorm2d: 2-5                  [256, 32, 28, 28]         64
├─Sequential: 1-16                       --                        (recursive)
│    └─LeakyReLU: 2-6                    [256, 32, 28, 28]         --
├─Sequential: 1-5                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-7                       [256, 32, 28, 28]     

In [33]:
# Relu and BachNormalztion and ResidualConnention
cnn_res_relu = cnn_res(C, n_filters, classes, activation=nn.LeakyReLU(.1))

In [34]:
summary(cnn_res_relu, input_size=(batch_size, C, W, H))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [256, 10]                 --
├─ResidualBottelneck: 1-1                [256, 32, 28, 28]         128
│    └─Sequential: 2-1                   [256, 32, 28, 28]         920
│    │    └─BatchNorm2d: 3-1             [256, 1, 28, 28]          2
├─LeakyReLU: 1-2                         [256, 1, 28, 28]          --
├─ResidualBottelneck: 1-7                --                        (recursive)
│    └─Sequential: 2-4                   --                        (recursive)
│    │    └─Conv2d: 3-2                  [256, 8, 28, 28]          16
│    │    └─BatchNorm2d: 3-3             [256, 8, 28, 28]          16
├─LeakyReLU: 1-4                         [256, 8, 28, 28]          --
├─ResidualBottelneck: 1-7                --                        (recursive)
│    └─Sequential: 2-4                   --                        (recursive)
│    │    └─Conv2d: 3-4                  [256, 8

### PReLU

In [35]:
# prelu

activation = nn.PReLU()
cnn_prelu = nn.Sequential(
    cnn_layer(C, n_filters, activation=activation), 
    cnn_layer(n_filters, activation=activation), 
    cnn_layer(n_filters, activation=activation),
    nn.MaxPool2d((2,2)),
    cnn_layer(n_filters, 2*n_filters, activation=activation),
    cnn_layer(2*n_filters, activation=activation), 
    cnn_layer(2*n_filters, activation=activation),
    nn.MaxPool2d((2,2)),
    cnn_layer(2*n_filters, 4*n_filters, activation=activation), 
    cnn_layer(4*n_filters, activation=activation), 
    nn.Flatten(),
    nn.Linear(D * 4 * n_filters // (4 * 4), classes),
)

In [36]:
summary(cnn_prelu, input_size=(batch_size, C, W, H))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [256, 10]                 --
├─Sequential: 1-1                        [256, 32, 28, 28]         1
│    └─Conv2d: 2-1                       [256, 32, 28, 28]         320
├─Sequential: 1-16                       --                        (recursive)
│    └─PReLU: 2-2                        [256, 32, 28, 28]         1
├─Sequential: 1-3                        [256, 32, 28, 28]         1
│    └─Conv2d: 2-3                       [256, 32, 28, 28]         9,248
├─Sequential: 1-16                       --                        (recursive)
│    └─PReLU: 2-4                        [256, 32, 28, 28]         (recursive)
├─Sequential: 1-5                        [256, 32, 28, 28]         1
│    └─Conv2d: 2-5                       [256, 32, 28, 28]         9,248
├─Sequential: 1-16                       --                        (recursive)
│    └─PReLU: 2-6                        [256,

In [37]:
# PReLU and BachNormalization
activation = nn.PReLU()
cnn_prelu_bn = nn.Sequential(
    cnn_layer_bn(C, n_filters, activation=activation),
    cnn_layer_bn(n_filters, activation=activation), 
    cnn_layer_bn(n_filters, activation=activation),
    nn.MaxPool2d((2,2)),
    cnn_layer_bn(n_filters,2*n_filters, activation=activation),
    cnn_layer_bn(2*n_filters, activation=activation), 
    cnn_layer_bn(2*n_filters, activation=activation),
    nn.MaxPool2d((2,2)),
    cnn_layer_bn(2*n_filters, 4*n_filters, activation=activation), 
    cnn_layer_bn(4*n_filters, activation=activation), 
    nn.Flatten(),
    nn.Linear(D * 4 * n_filters // (4 * 4), classes),
)

In [38]:
summary(cnn_prelu_bn, input_size=(batch_size, C, W, H))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [256, 10]                 --
├─Sequential: 1-1                        [256, 32, 28, 28]         1
│    └─Conv2d: 2-1                       [256, 32, 28, 28]         320
│    └─BatchNorm2d: 2-2                  [256, 32, 28, 28]         64
├─Sequential: 1-16                       --                        (recursive)
│    └─PReLU: 2-3                        [256, 32, 28, 28]         1
├─Sequential: 1-3                        [256, 32, 28, 28]         1
│    └─Conv2d: 2-4                       [256, 32, 28, 28]         9,248
│    └─BatchNorm2d: 2-5                  [256, 32, 28, 28]         64
├─Sequential: 1-16                       --                        (recursive)
│    └─PReLU: 2-6                        [256, 32, 28, 28]         (recursive)
├─Sequential: 1-5                        [256, 32, 28, 28]         1
│    └─Conv2d: 2-7                       [256, 32, 28, 28]

In [39]:
# PRelu and BachNormalztion and ResidualConnention

activation = nn.PReLU()
cnn_res_prelu = cnn_res(C, n_filters, classes, activation=nn.PReLU())

In [40]:
summary(cnn_res_prelu, input_size=(batch_size, C, W, H))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [256, 10]                 --
├─ResidualBottelneck: 1-1                [256, 32, 28, 28]         128
│    └─Sequential: 2-1                   [256, 32, 28, 28]         921
│    │    └─BatchNorm2d: 3-1             [256, 1, 28, 28]          2
├─PReLU: 1-2                             [256, 1, 28, 28]          1
├─ResidualBottelneck: 1-7                --                        (recursive)
│    └─Sequential: 2-4                   --                        (recursive)
│    │    └─Conv2d: 3-2                  [256, 8, 28, 28]          16
│    │    └─BatchNorm2d: 3-3             [256, 8, 28, 28]          16
├─PReLU: 1-4                             [256, 8, 28, 28]          (recursive)
├─ResidualBottelneck: 1-7                --                        (recursive)
│    └─Sequential: 2-4                   --                        (recursive)
│    │    └─Conv2d: 3-4                 

## Training

In [41]:
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs = 10
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size
}

In [None]:
models = {
    'cnn_relu': cnn_relu,
    'cnn_relu_bn': cnn_relu_bn,
    'cnn_relu_res': cnn_res_relu,
    'cnn_prelu': cnn_prelu,
    'cnn_prelu_bn': cnn_prelu_bn,
    'cnn_prelu_res': cnn_res_prelu,
    }

In [None]:
for experiment, model in models.items():
    optimizer = torch.optim.AdamW(model.parameters())
    params['optimizer'] = optimizer.defaults
    with open('model_summary.txt', 'w') as f:
        f.write(str(summary(model, input_size=(batch_size, C, W, H))))
    with mlflow.start_run(nested=True, run_name=experiment):
        mlflow.log_artifact('model_summary.txt')
        mlflow.log_params(params)
        fc_results = train_network(
            model=model,
            loss_func=loss_func,
            train_loader=train_loader,
            valid_loader=test_loader,
            # test_loader=test_loader,
            epochs=epochs,
            optimizer=optimizer,
            score_funcs=score_funcs,
            device=device,
            )

<img src="./images/E1_train_acc.png">

<img src="./images/E1_train_loss.png">

<img src="./images/E1_valid_acc.png">

<img src="./images/E1_valid_loss.png">

<img src="./images/E1_time.png">