In [None]:
%load_ext autoreload
%autoreload 2

# Exercise 2

<img src='./images/02.png' width=800>

In [None]:
import os
import mlflow
os.environ['MLFLOW_TRACKING_URI'] = './mlruns03_2'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
mlflow.set_experiment('Exercise_2')

2025/04/09 08:42:38 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_1' does not exist. Creating a new experiment.


<Experiment: artifact_location='/kaggle/working/mlruns/674194671375701503', creation_time=1744188158264, experiment_id='674194671375701503', last_update_time=1744188158264, lifecycle_stage='active', name='Exercise_1', tags={}>

In [None]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
import seaborn as sns
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms
import torch.optim as optim
from torchinfo import summary
from utils import train_network, accuracy_score_wrapper


In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])
cifar_train = torchvision.datasets.CIFAR10(
    './data_cifar',
    download=True,
    transform=transform,
    train=True,)
cifar_test = torchvision.datasets.CIFAR10(
    './data_cifar',
    download=True,
    transform=transform,
    train=False,)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data_cifar/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:04<00:00, 34.1MB/s] 


Extracting ./data_cifar/cifar-10-python.tar.gz to ./data_cifar
Files already downloaded and verified


In [None]:
batch_size = 256
cifar_train_loader = DataLoader(
    cifar_train,
    shuffle=True,
    batch_size=batch_size,
)

cifar_test_loader = DataLoader(
    cifar_test,
    batch_size=batch_size,
    )

In [None]:
# C = cifar_train[0][0].shape[0]
C = 3
filter = 16
K = 3
# w = cifar_train[0][0].shape[1]
# h = cifar_train[0][0].shape[2]
w , h = 32, 32
# classes = cifar_train[0][1].shape[0]
classes = 10
def build_model(num_conv_layers,
                num_pool_layers,
                num_hidden_layer=2, 
                init_hidden_size=512, 
                decay_factor=2,
                activation=nn.ReLU()):
    layers =[]
    in_channels = C
    out_channels = 32
    if num_pool_layers:
        pool_interval = max(1, num_conv_layers // (num_pool_layers + 1))
    else:
        pool_interval = num_conv_layers + 1
    
    currnet_pool_rounds = 0
    for i in range(num_conv_layers):
        layers.append(nn.Conv2d(
            in_channels=in_channels, 
            out_channels=out_channels,
            kernel_size=3,
            padding=3//2))
        layers.append(activation)
        in_channels = out_channels
        if (i+1) % pool_interval == 0 and currnet_pool_rounds < num_pool_layers:
            layers.append(nn.MaxPool2d(kernel_size=2))
            currnet_pool_rounds += 1
            out_channels *= 2
    final_w = w // (2 ** num_pool_layers)
    final_h = h // (2 ** num_pool_layers)
    fc_layers = []
    # Compute the number of features after flattening.
    in_features = in_channels * final_w * final_h

    fc_layers.append(nn.Flatten())

    if num_hidden_layer == 0:
        # Directly classify without extra hidden layers.
        fc_layers.append(nn.Linear(in_features, classes))
    else:
        # First FC layer: from flattened output to initial hidden size.
        fc_layers.append(nn.Linear(in_features, init_hidden_size))
        fc_layers.append(nn.ReLU(inplace=True))
        
        # Set the current hidden size that will be reduced in subsequent layers.
        current_hidden_size = init_hidden_size

        # Add additional hidden layers with decreasing size.
        for layer in range(1, num_hidden_layer):
            # Compute new hidden size with decay.
            new_hidden_size = max(10, current_hidden_size // decay_factor)
            fc_layers.append(nn.Linear(current_hidden_size, new_hidden_size))
            fc_layers.append(nn.ReLU(inplace=True))
            current_hidden_size = new_hidden_size

        # Final classification layer from the last hidden dimension to the number of classes.
        fc_layers.append(nn.Linear(current_hidden_size, classes))

    classifier = nn.Sequential(*fc_layers)
    model = nn.Sequential(*layers, classifier)
    return model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
epochs=20
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size
}

## without optuna

In [None]:
for num_conv in range(2, 11):
    for num_pool in range(0,3):  
        print(f'num_conv:{num_conv}...num_pool:{num_pool}')
        model = build_model(
            num_conv_layers=num_conv,
            num_pool_layers=num_pool,
            num_hidden_layer=2, 
            init_hidden_size=512, 
            decay_factor=2,
            activation=nn.ReLU()
            )
        # optimizer = optim.SGD(model.parameters(), lr=0.001)
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        params['optimizer'] = optimizer.defaults
        params['num_conv'] = num_conv
        params['num_pool'] = num_pool
        with open('model_summary.txt', 'w') as f:
            f.write(str(summary(model, input_size=(batch_size, 3, 32, 32))))
        with mlflow.start_run(nested=True, run_name=f'model{num_conv}_{num_pool}'):
            mlflow.log_params(params)
            mlflow.log_artifact('model_summary.txt')
            results = train_network(
            model=model,
            optimizer=optimizer,
            loss_func=loss_func,
            train_loader=cifar_train_loader,
            valid_loader=cifar_test_loader,
            epochs=epochs,
            device=device,
            score_funcs=score_funcs,
            )
    

### Grouped by based on num_pool

<img src="./images/valid_acc_num_pool.png">

<img src="./images/valid_loss_num_pool.png">

### Grouped by based on num_conv

<img src="./images/valid_acc_num_conv.png">

<img src="./images/valid_loss_num_conv.png">

### with optuna

In [None]:
import optuna
from mlflow.types import Schema, TensorSpec
from mlflow.models import ModelSignature

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}
activation_functions = {
'ReLU': nn.ReLU(),
'Tanh': nn.Tanh(),
'LeakyReLU': nn.LeakyReLU(),
'Sigmoid': nn.Sigmoid()
}


optuna.logging.set_verbosity(optuna.logging.ERROR)
def champion_callback(study, frozen_trial):
    winner = study.user_attrs.get('winner', None)
    if winner is None:
        print(f'Initial trial {frozen_trial.number} achived value: {frozen_trial.value}')
    elif winner != study.best_value and study.best_value:   # second condition is for preventing zero devision
        improvment_percent = (abs(winner - study.best_value) / abs(study.best_value)) * 100
        print(f'Trial {frozen_trial.number} achived value: {frozen_trial.value} with {improvment_percent:.4f}% improvment')
    study.set_user_attr('winner', study.best_value)


def objective(trial):
    params = {
    'batch_size': trial.suggest_categorical('batch_size', [2**i for i in range(4,10)]),
    'device': device,
    'epochs' : epochs,
    # 'optimizer': optimizer.defaults,
    'loss_function': loss_func.__class__.__name__,
    'learning_rate': trial.suggest_float('lr', 1e-4, 1, log=True),
    'num_conv': trial.suggest_int("num_conv", 2, 10),
    'num_pool': trial.suggest_int("num_pool", 0, 2),
    'activation': trial.suggest_categorical("activation", list(activation_functions.keys())),
    'fc_hidden_layer': trial.suggest_int('fc_hidden_layer', 0, 5),
    'init_hidden_size':  trial.suggest_categorical('init_hidden_size', [2**i for i in range(6,10)]),
    }
    model = build_model(num_conv_layers=params['num_conv'],
                        num_pool_layers=params['num_pool'],
                        num_hidden_layer=params['fc_hidden_layer'], 
                        init_hidden_size=params['init_hidden_size'], 
                        activation=activation_functions[params['activation']],
                    )
    
    # run_name = f'trial_lr_{params["learning_rate"]:.8f}'
    run_name = f'trial: {trial.number}'
    with mlflow.start_run(nested=True, run_name=run_name):

        optimizer = torch.optim.SGD(model.parameters(), lr=params['learning_rate'])
        params['optimizer'] = optimizer.defaults
        mlflow.log_params(params)

        train_dataloader = DataLoader(cifar_train, batch_size=params['batch_size'],shuffle=True)
        valid_dataloader = DataLoader(cifar_test, batch_size=params['batch_size'])
        
        with open ("model_summary.txt", "w") as f:
            f.write(str(summary(model)))
        mlflow.log_artifact("model_summary.txt")

        # model.apply(weight_reset)
        cnn_results = train_network(
            model=model,
            loss_func=loss_func,
            train_loader=train_dataloader,
            valid_loader=valid_dataloader,
            epochs=epochs,
            optimizer=optimizer,
            score_funcs={'Acc':accuracy_score_wrapper},
            device=device,
            # checkpont_file_save='model.pth'
        )
        
        # input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 2))])
        # output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 2))])
        # signature = ModelSignature(inputs=input_schema, outputs=output_schema)
        # mlflow.pytorch.log_model(model, "model", signature=signature)
        # mlflow.log_figure(plot_results(fc_results), "valid_AUC.png")
    return  cnn_results['valid Acc'].iloc[-1]

epochs = 20
# run_name = "second_attempt"
# with mlflow.start_run():
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20, callbacks=[champion_callback])
champion_trial = study.best_trial
print(f"Champion trial: {champion_trial.number} with value {champion_trial.value}")