In [1]:
%load_ext autoreload
%autoreload 2

# Exercise 5

<img src="./images/05.png" width=800>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision 
from torchvision import transforms

from torch.utils.data import Dataset, DataLoader

import os
import mlflow
from torchinfo import summary
from utils import train_network, accuracy_score_wrapper



  from tqdm.autonotebook import tqdm


In [None]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns06_5'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
mlflow.set_experiment('Exercise06_5')

2025/06/07 12:02:31 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_4' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_05/mlruns_4/626301946211563652', creation_time=1749285151104, experiment_id='626301946211563652', last_update_time=1749285151104, lifecycle_stage='active', name='Exercise_4', tags={}>

In [2]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and Dataloader

In [3]:
train_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)

batch_size = 256
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [4]:
W = train_data.data.shape[1] 
H = train_data.data.shape[2]
D = W * H
C = 1
classes = len(torch.unique(train_data.targets))
n_filters = 32

## Models

In [5]:
class ResidualBlockE(nn.Module):
    def __init__(self, channels, kernel_size=3, activation=nn.LeakyReLU(.1)):
        super().__init__()
        
        pad = (kernel_size - 1) // 2
        
        self.F = nn.Sequential(
            nn.Conv2d(channels, channels, kernel_size, padding=pad),
            nn.BatchNorm2d(channels),
            activation,
            nn.Conv2d(channels, channels, kernel_size, padding=pad),
            nn.BatchNorm2d(channels),
            activation,
            )
    def forward(self, x):
        return x + self.F(x)

In [6]:
class ResidualBottelneck(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, activation=nn.LeakyReLU(.1)):
        super().__init__()
        pad = (kernel_size -1) // 2
        bottleneck = max(out_channels//4, in_channels)
        self.F = nn.Sequential(
        nn.BatchNorm2d(in_channels),
        activation,
        nn.Conv2d(in_channels, bottleneck, 1, padding=0),
        
        nn.BatchNorm2d(bottleneck),
        activation,
        nn.Conv2d(bottleneck, bottleneck, kernel_size, padding=pad),
        
        nn.BatchNorm2d(bottleneck),
        activation,
        nn.Conv2d(bottleneck, out_channels, 1, padding=0)
        )
    
        self.shortcut = nn.Identity()
        if in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, 1, padding=0),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        return self.F(x) + self.shortcut(x)

In [None]:
def cnn_res(C, n_filters, classes, activation, W, H, num_layers):
    layers = [
    ResidualBottelneck(C, n_filters, activation=activation),
    activation,
    ResidualBlockE(n_filters, activation=activation),
    activation]
    num_pool = 0
    if num_layers==1:
        layers.extend([nn.MaxPool2d((2,2)),
        ResidualBottelneck(n_filters, 2*n_filters, activation=activation),
        activation,
        ResidualBlockE(2*n_filters, activation=activation),
        activation])
        feature_maps = 2*n_filters
        num_pool += 1
    elif num_layers==2:
        for i in range(num_layers):
            layers.extend([nn.MaxPool2d((2,2)),
            ResidualBottelneck((2**i)*n_filters, (2**i)*2*n_filters, activation=activation),
            activation,
            ResidualBlockE((2**i)*2*n_filters, activation=activation),
            activation])
            num_pool += 1
        feature_maps = (2**i)*2*n_filters
    elif num_layers>2:
        num_pool = 0
        for i in range(num_layers):
            if num_pool<3 and (not i%int(num_layers//3)):
                num_pool +=1
                layers.extend([nn.MaxPool2d((2,2)),
                ResidualBottelneck((2**i)*n_filters, (2**i)*2*n_filters, activation=activation),
                activation,
                ResidualBlockE((2**i)*2*n_filters, activation=activation),
                activation])
            else:
                layers.extend([ResidualBottelneck((2**i)*n_filters, (2**i)*2*n_filters, activation=activation),
                activation,
                ResidualBlockE((2**i)*2*n_filters, activation=activation),
                activation])
        feature_maps = (2**i)*2*n_filters 
    layers.extend([nn.Flatten(),
    nn.Linear(feature_maps*(W//(2**num_pool))*(H//(2**num_pool)), classes)])
    return nn.Sequential(*layers)

In [16]:
activation = nn.LeakyReLU(.1)
# D = W // (2**(1))
for num_layers in range(1, 6):
    print(f'num_layers: {num_layers}')
    model = cnn_res(C, n_filters, classes, activation, W, H, num_layers=num_layers)
    summary(model, input_size=(batch_size, C, W, H))
    # with open(f'./model_summary/num_layers_{num_layers}.txt', 'w') as f:
    #     f.write(str(summary(model, input_size=(batch_size, C, W, H))))

num_layers: 1
64
14 14
num_layers: 2
128
7 7
num_layers: 3
256
3 3
num_layers: 4
512
3 3
num_layers: 5
1024
3 3


In [10]:
model

Sequential(
  (0): ResidualBottelneck(
    (F): Sequential(
      (0): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): LeakyReLU(negative_slope=0.1)
      (2): Conv2d(1, 8, kernel_size=(1, 1), stride=(1, 1))
      (3): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (4): LeakyReLU(negative_slope=0.1)
      (5): Conv2d(8, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (7): LeakyReLU(negative_slope=0.1)
      (8): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
    )
    (shortcut): Sequential(
      (0): Conv2d(1, 32, kernel_size=(1, 1), stride=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): LeakyReLU(negative_slope=0.1)
  (2): ResidualBlockE(
    (F): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


## Training

In [None]:
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs = 10
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size
}

In [None]:
for num_layers in range(7):
    print(f'num_layers: {num_layers}')
    model = cnn_res(C, n_filters, classes, activation, W, H, num_layers=num_layers)
    optimizer = torch.optim.AdamW(model.parameters())
    params['optimizer'] = optimizer.defaults
    params['num_layers'] = num_layers + 1
    with open('model_summary.txt', 'w') as f:
        f.write(str(summary(model, input_size=(batch_size, C, W, H))))
    with mlflow.start_run(nested=True, run_name=f'ResidualBlockE_{num_layers+1}'):
        mlflow.log_artifact('model_summary.txt')
        mlflow.log_params(params)
        fc_results = train_network(
            model=model,
            loss_func=loss_func,
            train_loader=train_loader,
            valid_loader=test_loader,
            # test_loader=test_loader,
            epochs=epochs,
            optimizer=optimizer,
            score_funcs=score_funcs,
            device=device,
            )

<img src="./images/E5_train_acc.png">

<img src="./images/E5_train_loss.png">

<img src="./images/E5_valid_acc.png">

<img src="./images/E5_valid_loss.png">

<img src="./images/E5_time.png">