In [1]:
%load_ext autoreload
%autoreload 2

# Exercise 2

<img src="./images/02.png" width=800>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision 
from torchvision import transforms

from torch.utils.data import Dataset, DataLoader

import os
import mlflow
from torchinfo import summary
from utils import train_network, accuracy_score_wrapper



  from tqdm.autonotebook import tqdm


In [44]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns06_2'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [45]:
mlflow.set_experiment('Exercise06_2')

2025/06/08 15:29:31 INFO mlflow.tracking.fluent: Experiment with name 'Exercise06_2' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_06/mlruns06_2/743342736727177787', creation_time=1749383971503, experiment_id='743342736727177787', last_update_time=1749383971503, lifecycle_stage='active', name='Exercise06_2', tags={}>

In [2]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and Dataloader

In [3]:
train_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)

batch_size = 256
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [4]:
W = train_data.data.shape[1] 
H = train_data.data.shape[2]
D = W * H
C = 1
classes = len(torch.unique(train_data.targets))
n_filters = 32

## Models

In [5]:
def cnn_layer(in_filters, out_filters=None, kernel_size=3, activation=nn.LeakyReLU(.1)):
    if out_filters is None:
        out_filters = in_filters
    padding = kernel_size // 2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding),
        activation
    )

In [6]:
def cnn_layer_bn(in_filters, out_filters=None, kernel_size=3, activation=nn.LeakyReLU(.1)):
    if out_filters is None:
        out_filters = in_filters
    padding = kernel_size // 2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding),
        nn.BatchNorm2d(out_filters),
        activation
    )

## Training

In [51]:
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs = 10
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size
}

In [7]:
type_layer = {
    'cnn': cnn_layer,
    'cnn_bn': cnn_layer_bn,
    }

In [None]:
for experiment, one_layer in type_layer.items():
    for num_layers in range(1, 21):  
        print(f"{num_layers} CNN layers ---")

        layers = []
        in_channels = C
        num_maxpool = 0  # Track the number of max pooling layers added
        cnn_layer_count = 0

        # Calculate the interval for adding max pooling layers
        pooling_interval = max(1, num_layers // 3)  # Ensure at least 1

        for i in range(num_layers):
            # Add a CNN layer
            layers.append(one_layer(in_channels, n_filters))
            in_channels = n_filters
            cnn_layer_count += 1

            # Add MaxPool2d if conditions are met
            if num_layers > 3 and num_maxpool < 3 and cnn_layer_count >= pooling_interval:
                layers.append(nn.MaxPool2d((2, 2)))
                num_maxpool += 1
                cnn_layer_count = 0 #reset counter

        # Adaptive Pooling and Flatten
        adaptive_size = 8
        layers.append(nn.AdaptiveAvgPool2d((adaptive_size, adaptive_size)))
        layers.append(nn.Flatten())

        # Calculate the size of the input to the linear layer
        flattened_size = n_filters * adaptive_size * adaptive_size
        layers.append(nn.Linear(flattened_size, classes))

        # Create the model
        model = nn.Sequential(*layers)
        optimizer = torch.optim.AdamW(model.parameters())
        params['optimizer'] = optimizer.defaults
        params['num_layers'] = num_layers
        params['experiment'] = experiment
        with open('model_summary.txt', 'w') as f:
            f.write(str(summary(model, input_size=(batch_size, C, W, H))))
        with mlflow.start_run(nested=True, run_name=experiment+f'{num_layers}'):
            mlflow.log_artifact('model_summary.txt')
            mlflow.log_params(params)
            fc_results = train_network(
                model=model,
                loss_func=loss_func,
                train_loader=train_loader,
                valid_loader=test_loader,
                # test_loader=test_loader,
                epochs=epochs,
                optimizer=optimizer,
                score_funcs=score_funcs,
                device=device,
                )

#### **Some of the results along with averages are reported**:

<img src="./images/E2_train_acc_selected.png">

<img src="./images/E2_train_loss_selected.png">

<img src="./images/E2_valid_acc_selected.png">

<img src="./images/E2_valid_loss_selected.png">

##### **Averages**

<img src="./images/E2_train_acc_avg.png">

<img src="./images/E2_train_loss_avg.png">

<img src="./images/E2_valid_acc_avg.png">

<img src="./images/E2_valid_loss_avg.png">

<img src="./images/E2_time_avg.png">