In [None]:
%load_ext autoreload
%autoreload 2

# Exercise 5

<img src='./images/05.png' width=800>

In [None]:
import os
import mlflow
os.environ['MLFLOW_TRACKING_URI'] = './mlruns03_5'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
mlflow.set_experiment('Exercise_5')

2025/04/18 15:07:16 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_5' does not exist. Creating a new experiment.


<Experiment: artifact_location='/kaggle/working/mlruns/360360039686392042', creation_time=1744988836419, experiment_id='360360039686392042', last_update_time=1744988836419, lifecycle_stage='active', name='Exercise_5', tags={}>

In [None]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
import seaborn as sns
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms
import torch.optim as optim
from torchinfo import summary
from utils import train_network, accuracy_score_wrapper


In [None]:
batch_size = 256

In [None]:
mnist_data_train = torchvision.datasets.MNIST(
    "./data",
    train=True,
    download=True,
    transform=transforms.ToTensor(),
    )
mnist_data_test = torchvision.datasets.MNIST(
    "./data",
    train=False,
    download=True,
    transform=transforms.ToTensor()
    )

mnist_train_loader = DataLoader(
    mnist_data_train,
    batch_size=batch_size,
    shuffle=True
    )
mnist_test_loader = DataLoader(
    mnist_data_test,
    batch_size=batch_size
    )

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs = 10
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size
}

## mnist

In [None]:
def model_cnn_pool(D, C, filters, K, classes):
    model = nn.Sequential(
    nn.Conv2d(C, filters, K, padding=K//2),
    nn.Tanh(),
    nn.Conv2d(filters, filters, K, padding=K//2),
    nn.Tanh(),
    nn.Conv2d(filters, filters, K, padding=K//2),
    nn.Tanh(),
    nn.MaxPool2d(2),
    nn.Conv2d(filters, 2*filters, K, padding=K//2),
    nn.Tanh(),
    nn.Conv2d(2*filters, 2*filters, K, padding=K//2),
    nn.Tanh(),
    nn.Conv2d(2*filters, 2*filters, K, padding=K//2),
    nn.Tanh(),
    nn.MaxPool2d(2),
    nn.Flatten(),
    nn.Linear(2*filters*D//4**2, classes),
    )
    return model

In [None]:
for i in (2 ** j for j in range(2,10)):
    print(i)
    model = model_cnn_pool(D=28*28, C=1, filters=i, K=3, classes=10)
    params['filters'] = i
    optimizer = optim.SGD(model.parameters(), lr=0.001)
    params['optimizer'] = optimizer.defaults
    with open('model_summary.txt', 'w') as f:
        f.write(str(summary(model)))  
    with mlflow.start_run(nested=True, run_name='cnn_pool_mnist'+f'filter={i}'):
        mlflow.log_params(params)
        mlflow.log_artifact('model_summary.txt')

        results = train_network(
            model=model,
            optimizer=optimizer,
            loss_func=loss_func,
            train_loader=mnist_train_loader,# mnist
            valid_loader=mnist_test_loader,#mnist
            epochs=epochs,
            device=device,
            score_funcs=score_funcs,
            )

<img src="./images/valid_acc_filters_mnist.png">

<img src="./images/valid_loss_filters_mnist.png">

### cifar

In [None]:
cifar_train = torchvision.datasets.CIFAR10(
    './data_cifar',
    download=True,
    transform=transforms.ToTensor(),
    train=True,)
cifar_test = torchvision.datasets.CIFAR10(
    './data_cifar',
    download=True,
    transform=transforms.ToTensor(),
    train=False,)

cifar_train_loader = DataLoader(
    cifar_train,
    shuffle=True,
    batch_size=batch_size,
    num_workers=4)

cifar_test_loader = DataLoader(
    cifar_test,
    batch_size=batch_size,
    num_workers=4)

In [None]:
# C = cifar_train[0][0].shape[0]
C = 3
filter = 16
K = 3
# w = cifar_train[0][0].shape[1]
# h = cifar_train[0][0].shape[2]
w , h = 32, 32
# classes = cifar_train[0][1].shape[0]
classes = 10
def build_model(num_conv_layers,
                num_pool_layers,
                num_hidden_layer=2, 
                init_hidden_size=512, 
                decay_factor=2,
                activation=nn.ReLU(),
                out_channels=32):
    layers =[]
    in_channels = C
    # out_channels = 32
    if num_pool_layers:
        pool_interval = max(1, num_conv_layers // (num_pool_layers + 1))
    else:
        pool_interval = num_conv_layers + 1
    
    currnet_pool_rounds = 0
    for i in range(num_conv_layers):
        layers.append(nn.Conv2d(
            in_channels=in_channels, 
            out_channels=out_channels,
            kernel_size=3,
            padding=3//2))
        layers.append(activation)
        in_channels = out_channels
        if (i+1) % pool_interval == 0 and currnet_pool_rounds < num_pool_layers:
            layers.append(nn.MaxPool2d(kernel_size=2))
            currnet_pool_rounds += 1
            out_channels *= 2
    final_w = w // (2 ** num_pool_layers)
    final_h = h // (2 ** num_pool_layers)
    fc_layers = []
    # Compute the number of features after flattening.
    in_features = in_channels * final_w * final_h

    fc_layers.append(nn.Flatten())

    if num_hidden_layer == 0:
        # Directly classify without extra hidden layers.
        fc_layers.append(nn.Linear(in_features, classes))
    else:
        # First FC layer: from flattened output to initial hidden size.
        fc_layers.append(nn.Linear(in_features, init_hidden_size))
        fc_layers.append(nn.ReLU(inplace=True))
        
        # Set the current hidden size that will be reduced in subsequent layers.
        current_hidden_size = init_hidden_size

        # Add additional hidden layers with decreasing size.
        for layer in range(1, num_hidden_layer):
            # Compute new hidden size with decay.
            new_hidden_size = max(10, current_hidden_size // decay_factor)
            fc_layers.append(nn.Linear(current_hidden_size, new_hidden_size))
            fc_layers.append(nn.ReLU(inplace=True))
            current_hidden_size = new_hidden_size

        # Final classification layer from the last hidden dimension to the number of classes.
        fc_layers.append(nn.Linear(current_hidden_size, classes))

    classifier = nn.Sequential(*fc_layers)
    model = nn.Sequential(*layers, classifier)
    return model

In [None]:
for i in (2 ** j for j in range(2,10)):
    print(i)
    params['filters'] = i
    model = build_model(num_conv_layers=4,
            num_pool_layers=2,
            num_hidden_layer=2, 
            init_hidden_size=512, 
            decay_factor=2,
            activation=nn.ReLU(),
            out_channels=i)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    params['optimizer'] = optimizer.defaults
    with open('model_summary.txt', 'w') as f:
        f.write(str(summary(model)))  
    with mlflow.start_run(nested=True, run_name='build_model_cifar'+f'filter={i}'):
        mlflow.log_params(params)
        mlflow.log_artifact('model_summary.txt')

        results = train_network(
            model=model,
            optimizer=optimizer,
            loss_func=loss_func,
            train_loader=cifar_train_loader,# mnist
            valid_loader=cifar_test_loader,#mnist
            epochs=epochs,
            device=device,
            score_funcs=score_funcs,
            )

<img src="./images/valid_acc_filters_cifar.png">

<img src="./images/valid_loss_filters_cifar.png">

## Grouped Based on Filters 

<img src="./images/valid_acc_filters.png">

<img src="./images/valid_loss_filters.png">