In [1]:
%load_ext autoreload
%autoreload 2

# Exercise 2

<img src="./images/02.png" width=800>

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision 
from torchvision import transforms

from torch.utils.data import Dataset, DataLoader

import os
import mlflow
from torchinfo import summary
from utils import train_network, accuracy_score_wrapper



  from tqdm.autonotebook import tqdm


In [44]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns06_2'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [45]:
mlflow.set_experiment('Exercise06_2')

2025/06/08 15:29:31 INFO mlflow.tracking.fluent: Experiment with name 'Exercise06_2' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_06/mlruns06_2/743342736727177787', creation_time=1749383971503, experiment_id='743342736727177787', last_update_time=1749383971503, lifecycle_stage='active', name='Exercise06_2', tags={}>

In [2]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and Dataloader

In [3]:
train_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./", train=True, transform=transforms.ToTensor(), download=True)

batch_size = 256
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size)

In [4]:
W = train_data.data.shape[1] 
H = train_data.data.shape[2]
D = W * H
C = 1
classes = len(torch.unique(train_data.targets))
n_filters = 32

## Models

In [5]:
def cnn_layer(in_filters, out_filters=None, kernel_size=3, activation=nn.LeakyReLU(.1)):
    if out_filters is None:
        out_filters = in_filters
    padding = kernel_size // 2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding),
        activation
    )

In [6]:
def cnn_layer_bn(in_filters, out_filters=None, kernel_size=3, activation=nn.LeakyReLU(.1)):
    if out_filters is None:
        out_filters = in_filters
    padding = kernel_size // 2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding),
        nn.BatchNorm2d(out_filters),
        activation
    )

## Training

In [51]:
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs = 10
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size
}

In [7]:
type_layer = {
    'cnn': cnn_layer,
    'cnn_bn': cnn_layer_bn,
    }

In [None]:
for experiment, one_layer in type_layer.items():
    for num_layers in range(1, 21):  
        print(f"{num_layers} CNN layers ---")

        layers = []
        in_channels = C
        num_maxpool = 0  # Track the number of max pooling layers added
        cnn_layer_count = 0

        # Calculate the interval for adding max pooling layers
        pooling_interval = max(1, num_layers // 3)  # Ensure at least 1

        for i in range(num_layers):
            # Add a CNN layer
            layers.append(one_layer(in_channels, n_filters))
            in_channels = n_filters
            cnn_layer_count += 1

            # Add MaxPool2d if conditions are met
            if num_layers > 3 and num_maxpool < 3 and cnn_layer_count >= pooling_interval:
                layers.append(nn.MaxPool2d((2, 2)))
                num_maxpool += 1
                cnn_layer_count = 0 #reset counter

        # Adaptive Pooling and Flatten
        adaptive_size = 8
        layers.append(nn.AdaptiveAvgPool2d((adaptive_size, adaptive_size)))
        layers.append(nn.Flatten())

        # Calculate the size of the input to the linear layer
        flattened_size = n_filters * adaptive_size * adaptive_size
        layers.append(nn.Linear(flattened_size, classes))

        # Create the model
        model = nn.Sequential(*layers)
        optimizer = torch.optim.AdamW(model.parameters())
        params['optimizer'] = optimizer.defaults
        params['num_layers'] = num_layers
        params['experiment'] = experiment
        with open('model_summary.txt', 'w') as f:
            f.write(str(summary(model, input_size=(batch_size, C, W, H))))
        with mlflow.start_run(nested=True, run_name=experiment+f'{num_layers}'):
            mlflow.log_artifact('model_summary.txt')
            mlflow.log_params(params)
            fc_results = train_network(
                model=model,
                loss_func=loss_func,
                train_loader=train_loader,
                valid_loader=test_loader,
                # test_loader=test_loader,
                epochs=epochs,
                optimizer=optimizer,
                score_funcs=score_funcs,
                device=device,
                )

In [13]:
for experiment, one_layer in type_layer.items():
    for num_layers in range(1, 21):  
        print(f"{num_layers} CNN layers ---")

        layers = []
        in_channels = C
        num_maxpool = 0  # Track the number of max pooling layers added
        cnn_layer_count = 0

        # Calculate the interval for adding max pooling layers
        pooling_interval = max(1, num_layers // 3)  # Ensure at least 1

        for i in range(num_layers):
            # Add a CNN layer
            layers.append(one_layer(in_channels, n_filters))
            in_channels = n_filters
            cnn_layer_count += 1

            # Add MaxPool2d if conditions are met
            if num_layers > 3 and num_maxpool < 3 and cnn_layer_count >= pooling_interval:
                layers.append(nn.MaxPool2d((2, 2)))
                num_maxpool += 1
                cnn_layer_count = 0 #reset counter

        # Adaptive Pooling and Flatten
        adaptive_size = 8
        layers.append(nn.AdaptiveAvgPool2d((adaptive_size, adaptive_size)))
        layers.append(nn.Flatten())

        # Calculate the size of the input to the linear layer
        flattened_size = n_filters * adaptive_size * adaptive_size
        layers.append(nn.Linear(flattened_size, classes))

        # Create the model
        model = nn.Sequential(*layers)
        with open(f'./model_summary/{experiment}_{num_layers}.txt', 'w') as f:
            f.write(str(summary(model, input_size=(batch_size, C, W, H))))

1 CNN layers ---
2 CNN layers ---
3 CNN layers ---
4 CNN layers ---
5 CNN layers ---
6 CNN layers ---
7 CNN layers ---
8 CNN layers ---
9 CNN layers ---
10 CNN layers ---
11 CNN layers ---
12 CNN layers ---
13 CNN layers ---
14 CNN layers ---
15 CNN layers ---
16 CNN layers ---
17 CNN layers ---
18 CNN layers ---
19 CNN layers ---
20 CNN layers ---
1 CNN layers ---
2 CNN layers ---
3 CNN layers ---
4 CNN layers ---
5 CNN layers ---
6 CNN layers ---
7 CNN layers ---
8 CNN layers ---
9 CNN layers ---
10 CNN layers ---
11 CNN layers ---
12 CNN layers ---
13 CNN layers ---
14 CNN layers ---
15 CNN layers ---
16 CNN layers ---
17 CNN layers ---
18 CNN layers ---
19 CNN layers ---
20 CNN layers ---


In [None]:
import torch
import torch.nn as nn
# from torchsummary import summary

# --- Configuration (Example Values) ---
C = 3  # Input channels (e.g., RGB)
n_filters = 32  # Number of filters in the first layer
classes = 10  # Number of output classes
D = 28  # Input image dimension (assuming square images)
W = D
H = D
batch_size = 32

# --- Helper Function for CNN Layer ---
def conv_layer(in_channels, out_channels=None):
    if out_channels is None:
        out_channels = in_channels
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)

experiment = 'cnn'
# --- Main Loop to Define and Summarize CNN Models ---
if __name__ == '__main__':
    for num_layers in range(1, 21):  # Generate models with 1 to 20 CNN layers
        print(f"\n--- Model with {num_layers} CNN layers ---")

        layers = []
        in_channels = C
        num_maxpool = 0  # Track the number of max pooling layers added
        cnn_since_pool = 0 #Track layers since last pool

        for i in range(num_layers):
            # Add a CNN layer
            layers.append(conv_layer(in_channels, n_filters))
            in_channels = n_filters #next layer will use n_filters as input
            cnn_since_pool += 1

            # Add MaxPool2d if conditions are met
            if num_layers > 3 and num_maxpool < 3 and cnn_since_pool >= 3:
                layers.append(nn.MaxPool2d((2, 2)))
                num_maxpool += 1
                cnn_since_pool = 0 #reset counter

        # Adaptive Pooling and Flatten
        adaptive_size = 8
        layers.append(nn.AdaptiveAvgPool2d((adaptive_size, adaptive_size)))
        layers.append(nn.Flatten())

        # Calculate the size of the input to the linear layer
        flattened_size = n_filters * adaptive_size * adaptive_size
        layers.append(nn.Linear(flattened_size, classes))

        # Create the model
        model = nn.Sequential(*layers)

        # Print the model summary
        try:
            with open(f'./model_summary/{experiment}_{num_layers}.txt', 'w') as f:
                f.write(str(summary(model, input_size=(batch_size, C, W, H))))        
        except Exception as e:
            print(f"Error during summary: {e}")
            continue  # Skip to the next model if there's an error

In [12]:
import torch
import torch.nn as nn
# from torchsummary import summary

# --- Configuration (Example Values) ---
C = 3  # Input channels (e.g., RGB)
n_filters = 32  # Number of filters in the first layer
classes = 10  # Number of output classes
D = 28  # Input image dimension (assuming square images)
W = D
H = D
batch_size = 32
experiment = 'cnn'
# --- Helper Function for CNN Layer ---
def conv_layer(in_channels, out_channels=None):
    if out_channels is None:
        out_channels = in_channels
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)

# --- Main Loop to Define and Summarize CNN Models ---
if __name__ == '__main__':
    for num_layers in range(1, 21):  # Generate models with 1 to 20 CNN layers
        print(f"\n--- Model with {num_layers} CNN layers ---")

        layers = []
        in_channels = C
        num_maxpool = 0  # Track the number of max pooling layers added
        cnn_layer_count = 0

        # Calculate the interval for adding max pooling layers
        pooling_interval = max(1, num_layers // 3)  # Ensure at least 1

        for i in range(num_layers):
            # Add a CNN layer
            layers.append(conv_layer(in_channels, n_filters))
            in_channels = n_filters
            cnn_layer_count += 1

            # Add MaxPool2d if conditions are met
            if num_layers > 3 and num_maxpool < 3 and cnn_layer_count >= pooling_interval:
                layers.append(nn.MaxPool2d((2, 2)))
                num_maxpool += 1
                cnn_layer_count = 0 #reset counter

        # Adaptive Pooling and Flatten
        adaptive_size = 8
        layers.append(nn.AdaptiveAvgPool2d((adaptive_size, adaptive_size)))
        layers.append(nn.Flatten())

        # Calculate the size of the input to the linear layer
        flattened_size = n_filters * adaptive_size * adaptive_size
        layers.append(nn.Linear(flattened_size, classes))

        # Create the model
        model = nn.Sequential(*layers)

        # Print the model summary
        try:
            with open(f'./model_summary/{experiment}_{num_layers}.txt', 'w') as f:
                f.write(str(summary(model, input_size=(batch_size, C, W, H))))        
        except Exception as e:
            print(f"Error during summary: {e}")
            continue  # Skip to the next model if there's an error


--- Model with 1 CNN layers ---

--- Model with 2 CNN layers ---

--- Model with 3 CNN layers ---

--- Model with 4 CNN layers ---

--- Model with 5 CNN layers ---

--- Model with 6 CNN layers ---

--- Model with 7 CNN layers ---

--- Model with 8 CNN layers ---

--- Model with 9 CNN layers ---

--- Model with 10 CNN layers ---

--- Model with 11 CNN layers ---

--- Model with 12 CNN layers ---

--- Model with 13 CNN layers ---

--- Model with 14 CNN layers ---

--- Model with 15 CNN layers ---

--- Model with 16 CNN layers ---

--- Model with 17 CNN layers ---

--- Model with 18 CNN layers ---

--- Model with 19 CNN layers ---

--- Model with 20 CNN layers ---


In [None]:
for experiment, one_layer in type_layer.items():
    print(experiment)
    for num_layers in range(15, 20):
        print(num_layers)
        # if num_layers <= 3:
        #     num_maxpool = 1
        # else:
        #     num_maxpool = 2
        layers = [one_layer(C, n_filters)]
        in_channels = n_filters
        # out_channels = n_filters
        for layer in range(1, num_layers):
            if layer % 3:
                layers.append(one_layer(in_channels))
            else:
                num_maxpool **= 2 
                layers.append(nn.MaxPool2d((2,2)))
                layers.append(one_layer(in_channels, 2*in_channels))
                in_channels *= 2
        adaptive_size = 8 # Choose an appropriate size (e.g., 8x8)
        layers.append(nn.AdaptiveAvgPool2d((adaptive_size, adaptive_size)))
        layers.extend([nn.Flatten(), nn.Linear(in_channels*adaptive_size*adaptive_size, classes)])
        model = nn.Sequential(*layers)
        # summary(model, input_size=(batch_size, C, W, H))
        with open(f'./model_summary/{experiment}_{num_layers}.txt', 'w') as f:
            f.write(str(summary(model, input_size=(batch_size, C, W, H))))

cnn
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15


: 

In [14]:
D * in_channels // num_maxpool

784

In [13]:
num_maxpool

256

In [20]:
num_maxpool = 2
num_maxpool **= 2
num_maxpool

4

In [11]:
model

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
  )
  (1): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
  )
  (2): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
  )
  (3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
  )
  (5): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
  )
  (6): Sequential(
    (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1)
  )
  (7): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, 

In [16]:
in_channels

256

In [17]:
D *2* in_channels // num_maxpool

1568

In [12]:
D//(4*4*4)

12

In [56]:
summary(model)

Layer (type:depth-idx)                   Param #
Sequential                               --
├─Sequential: 1-1                        --
│    └─Conv2d: 2-1                       320
│    └─LeakyReLU: 2-2                    --
├─Flatten: 1-2                           --
├─Linear: 1-3                            125,450
Total params: 125,770
Trainable params: 125,770
Non-trainable params: 0

In [79]:
summary(model, input_size=(batch_size, C, W, H))

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [256, 10]                 --
├─Sequential: 1-1                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-1                       [256, 32, 28, 28]         320
├─Sequential: 1-6                        --                        (recursive)
│    └─LeakyReLU: 2-2                    [256, 32, 28, 28]         --
├─Sequential: 1-3                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-3                       [256, 32, 28, 28]         9,248
├─Sequential: 1-6                        --                        (recursive)
│    └─LeakyReLU: 2-4                    [256, 32, 28, 28]         --
├─Sequential: 1-5                        [256, 32, 28, 28]         --
│    └─Conv2d: 2-5                       [256, 32, 28, 28]         9,248
├─Sequential: 1-6                        --                        (recursive)
│    └─LeakyReLU: 2-6                    [256, 32, 