In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png', 'pdf')

  set_matplotlib_formats('png', 'pdf')


# Exercise 7

<img src='./images/07.png' width=800>

In [None]:
import os
import mlflow
os.environ['MLFLOW_TRACKING_URI'] = './mlruns'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
import mlflow
mlflow.set_experiment('Exercise_7')

2025/04/07 17:42:03 INFO mlflow.tracking.fluent: Experiment with name 'Exercise_7' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_02/mlruns/904989942481923738', creation_time=1744035123832, experiment_id='904989942481923738', last_update_time=1744035123832, lifecycle_stage='active', name='Exercise_7', tags={}>

In [None]:
from sklearn.datasets import make_moons
import torch
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader
import optuna
import torch.nn as nn
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from utils import (train_network, accuracy_score_wrapper, 
                f1_score_wrapper, roc_auc_score_micro_wrapper, 
                weight_reset, set_seed)
from torchinfo import summary
import optuna
from mlflow.types import Schema, TensorSpec
from mlflow.models import ModelSignature

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

In [None]:
random_state = 42
set_seed(random_state)
dataset = TensorDataset(
    torch.tensor(data.data, dtype=torch.float32),
    torch.tensor(data.target, dtype=torch.long)
    )
train_size = int(len(data.data) * 0.8)
valid_size = len(data.data) - train_size

train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size])

in_features = dataset.tensors[0].shape[1]
out_features = len(torch.unique(dataset.tensors[1]))
loss_func = nn.CrossEntropyLoss()
activation_functions = {
'ReLU': nn.ReLU(),
'Tanh': nn.Tanh(),
'LeakyReLU': nn.LeakyReLU(),
'Sigmoid': nn.Sigmoid()
}

In [None]:
def plot_results(data_df, close=True):
    sns.lineplot(data_df, x='epoch', y='valid F1', label='valid F1')
    plt.xlabel('epoch')
    plt.ylabel('valid F1')
    plt.title('valid F1')
    fig = plt.gcf()
    if close:
        plt.close()
    return fig
    
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

optuna.logging.set_verbosity(optuna.logging.ERROR)

def champion_callback(study, frozen_trial):
    winner = study.user_attrs.get('winner', None)
    if winner is None:
        print(f'Initial trial {frozen_trial.number} achived value: {frozen_trial.value}')
    elif winner != study.best_value and study.best_value:   # second condition is for preventing zero devision
        improvment_percent = (abs(winner - study.best_value) / abs(study.best_value)) * 100
        print(f'Trial {frozen_trial.number} achived value: {frozen_trial.value} with {improvment_percent:.4f}% improvment')
    study.set_user_attr('winner', study.best_value)


def objective(trial):
    params = {
    'batch_size': trial.suggest_int('batch_size', 16, 256),
    'device': device,
    'epochs' : epochs,
    # 'optimizer': optimizer.defaults,
    'loss_function': loss_func.__class__.__name__,
    'learning_rate': trial.suggest_float('lr', 1e-8, 1, log=True),
    'hidden_neurons': trial.suggest_int("neuron_per_layer", in_features, 500),
    'layers': trial.suggest_int("hidden_layers", 1, 20),
    'activation': trial.suggest_categorical("activation", list(activation_functions.keys()))
    }
    sequential_layer = [
        nn.Linear(in_features, params['hidden_neurons']),
        activation_functions[params['activation']]
    ]
    for _ in range(params['layers']):
        sequential_layer.append(nn.Linear(params['hidden_neurons'], params['hidden_neurons']))
        sequential_layer.append(activation_functions[params['activation']])
    sequential_layer.append(nn.Linear(params['hidden_neurons'], out_features))

    model = nn.Sequential(*sequential_layer)
    
    # run_name = f'trial_lr_{params["learning_rate"]:.8f}'
    run_name = f'trial: {trial.number}'
    with mlflow.start_run(nested=True, run_name=run_name):

        optimizer = torch.optim.SGD(model.parameters(), lr=params['learning_rate'])
        params['optimizer'] = optimizer.defaults
        mlflow.log_params(params)

        train_dataloader = DataLoader(train_dataset, batch_size=params['batch_size'],shuffle=True)
        valid_dataloader = DataLoader(valid_dataset, batch_size=params['batch_size'])
        
        with open ("model_summary.txt", "w") as f:
            f.write(str(summary(model)))
        mlflow.log_artifact("model_summary.txt")

        model.apply(weight_reset)
        fc_results = train_network(
            model=model,
            loss_func=loss_func,
            train_loader=train_dataloader,
            valid_loader=valid_dataloader,
            epochs=epochs,
            optimizer=optimizer,
            score_funcs={'Acc':accuracy_score_wrapper, 'F1':f1_score_wrapper, 'AUC':roc_auc_score_micro_wrapper },
            device=device,
            # checkpont_file_save='model.pth'
        )
        
        # input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, in_features))])
        # output_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, out_features))])
        # signature = ModelSignature(inputs=input_schema, outputs=output_schema)
        # mlflow.pytorch.log_model(model, "model", signature=signature)
        mlflow.log_figure(plot_results(fc_results), "valid_F1.png")
    return  fc_results['valid F1'].iloc[-1]

epochs = 20
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20, callbacks=[champion_callback])
champion_trial = study.best_trial
print(f"Champion trial: {champion_trial.number} with value {champion_trial.value}")

Epoch: 100%|██████████| 20/20 [00:03<00:00,  6.58it/s]


Initial trial 0 achived value: 0.7472527472527473


Epoch: 100%|██████████| 20/20 [00:02<00:00,  6.90it/s]
Epoch: 100%|██████████| 20/20 [00:04<00:00,  4.67it/s]
Epoch: 100%|██████████| 20/20 [00:03<00:00,  6.26it/s]
Epoch: 100%|██████████| 20/20 [00:05<00:00,  3.62it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 12.07it/s]
Epoch: 100%|██████████| 20/20 [00:03<00:00,  6.25it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 16.39it/s]


Trial 7 achived value: 0.9154929577464789 with 18.3770% improvment


Epoch: 100%|██████████| 20/20 [00:02<00:00,  7.23it/s]
Epoch: 100%|██████████| 20/20 [00:03<00:00,  6.06it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 13.50it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 15.94it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 14.28it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 11.05it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 15.34it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 15.42it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 10.57it/s]
Epoch: 100%|██████████| 20/20 [00:02<00:00,  8.34it/s]
Epoch: 100%|██████████| 20/20 [00:01<00:00, 10.20it/s]
Epoch: 100%|██████████| 20/20 [00:02<00:00,  8.50it/s]


Champion trial: 7 with value 0.9154929577464789


<img src='./images/exercise07.png' width=800>

<img src='./images/train_valid_f1.png' width=800>