In [1]:
%load_ext autoreload
%autoreload 2

# Exercise 6

<img src="./images/06.png" width=800>

In [2]:
import time
import torch
import numpy as np
from torch.utils.data import DataLoader, random_split, Subset
import torch.nn as nn 
from typing import DefaultDict, Any, Callable, Optional
import mlflow
import os
from utils import train_network, accuracy_score_wrapper
import torchvision
from torchvision import transforms
from torchinfo import summary
import mlflow
from  sklearn.model_selection import train_test_split
import optuna
from tqdm.autonotebook import tqdm

  from tqdm.autonotebook import tqdm


In [3]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns05_6'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [4]:
mlflow.set_experiment('Exercise05_6')

<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_05/mlruns05_6/679868600860297786', creation_time=1749579304858, experiment_id='679868600860297786', last_update_time=1749579304858, lifecycle_stage='active', name='Exercise05_6', tags={}>

In [5]:
torch.backends.cudnn.deterministic=True
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

## Dataset and DataLoader

In [6]:
train_data = torchvision.datasets.FashionMNIST("./data", train=True, transform=transforms.ToTensor(), download=True)
test_data = torchvision.datasets.FashionMNIST("./data", train=False, transform=transforms.ToTensor(), download=True)
train_sub_set, valid_sub_set = train_test_split(
    np.arange(len(train_data)),
    test_size=0.1,
    shuffle=True, 
    random_state=42,
    stratify=train_data.targets)

train_dataset = Subset(train_data, train_sub_set)
valid_dataset = Subset(train_data, valid_sub_set)
batch = 256
train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch)
test_loader = DataLoader(test_data, batch_size=batch)

In [7]:
loss_func = nn.CrossEntropyLoss()
score_funcs = {"Accuracy": accuracy_score_wrapper}

In [8]:
epochs = 20
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch
}
D = 28*28 #28 * 28 images 
C = 1
W = 28
H = 28
classes = 10

## Hyperparameter Tunning with Optuna

In [None]:
activation_functions = {
'ReLU': nn.ReLU(),
'Tanh': nn.Tanh(),
'LeakyReLU': nn.LeakyReLU(),
'Sigmoid': nn.Sigmoid()
}
optuna.logging.set_verbosity(optuna.logging.ERROR)
def champion_callback(study, frozen_trial):
    winner = study.user_attrs.get('winner', None)
    
    if winner is None:
        print(f'Initial trial {frozen_trial.number} achived value: {frozen_trial.value}')
    elif winner != study.best_value and study.best_value:   # second condition is for preventing zero devision
        improvment_percent = (abs(winner - study.best_value) / abs(study.best_value)) * 100
        print(f'Trial {frozen_trial.number} achived value: {frozen_trial.value} with {improvment_percent:.4f}% improvment')
    study.set_user_attr('winner', study.best_value)

def objective(trial):
    params = {
    'hidden_layers': trial.suggest_int('hidden_layers', 1, 5),
    'hidden_neurons': trial.suggest_categorical('hidden_neurons', [2**i for i in range(6,9)]),
    'activation': trial.suggest_categorical("activation", list(activation_functions.keys())),
    'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1, log=True)
    }
    activation = activation_functions[params['activation']]
    hidden_neurons = params['hidden_neurons']
    hidden_layers = params['hidden_layers']
    layers = [nn.Flatten(),
        nn.Linear(D, hidden_neurons ),
        activation]
    for _ in range(hidden_layers-1):
        layers.extend([nn.Linear(hidden_neurons,  hidden_neurons), activation])

    layers.append(nn.Linear(hidden_neurons, classes),)
    fc_model = nn.Sequential(*layers)

    # run_name = f'trial_lr_{params["learning_rate"]:.8f}'
    run_name = f'trial: {trial.number}'
    with mlflow.start_run(nested=True, run_name=run_name) as run:
        trial.set_user_attr('mlflow_run_id', run.info.run_id)
        optimizer = torch.optim.SGD(fc_model.parameters(), lr=params['learning_rate'])
        params['optimizer'] = optimizer.defaults
        mlflow.log_params(params)
        
        with open ("model_summary.txt", "w") as f:
            f.write(str(summary(fc_model, input_size=(batch, C, W, H))))
        mlflow.log_artifact("model_summary.txt")

        # model.apply(weight_reset)
        cnn_results = train_network(
            model=fc_model,
            loss_func=loss_func,
            train_loader=train_loader,
            valid_loader=valid_loader,
            # test_loader=test_loader
            epochs=epochs,
            optimizer=optimizer,
            score_funcs=score_funcs,
            device=device,
            disable_tqdm=True,
            # checkpont_file_save='model.pth'
        )
    return  cnn_results['valid Acc'].iloc[-1]

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20, callbacks=[champion_callback])
champion_trial = study.best_trial
champion_run_id = champion_trial.user_attrs('champion_run_id')
if champion_run_id:
    mlflow_client = mlflow.tracking.MlflowClient()
    mlflow_client.set_tag(champion_run_id, "is_champion", "true")
    mlflow_client.set_tag(champion_run_id, "champion_metric_value", str(champion_trial.value))
    mlflow_client.set_tag(champion_run_id, "optuna_trial_number", str(champion_trial.number))
    print(f"Champion trial: {champion_trial.number} with value {champion_trial.value}")
else:
    print("Error: Could not retrieve champion_run_id from champion_trial.user_attrs")


## Traning with Selected Hyperparameters

In [None]:
import ast


run_id = 'fd795a161d2e43aea9747b534d262842' #trial: 3
run = mlflow.get_run(run_id)
params = run.data.params

activation_functions = {
'ReLU': nn.ReLU(),
'Tanh': nn.Tanh(),
'LeakyReLU': nn.LeakyReLU(),
'Sigmoid': nn.Sigmoid()
}
hidden_layers = int(params['hidden_layers'])
hidden_neurons = int(params['hidden_neurons'])
activation = activation_functions[params['activation']]
optimizer_dict = ast.literal_eval( params['optimizer'])
learning_rate = float(optimizer_dict['lr'])

layers = [nn.Flatten(),
    nn.Linear(D, hidden_neurons ),
    activation]
for _ in range(hidden_layers):
    layers.extend([nn.Linear(hidden_neurons,  hidden_neurons), activation])

layers.append(nn.Linear(hidden_neurons, classes),)
fc_model = nn.Sequential(*layers)

with mlflow.start_run(nested=True, run_name='final_run'):
    mlflow.set_tag("final_run", "True")
    optimizer = torch.optim.SGD(fc_model.parameters(), lr=learning_rate)
    params['optimizer'] = optimizer.defaults
    mlflow.log_params(params)
    
    with open ("model_summary.txt", "w") as f:
        f.write(str(summary(fc_model, input_size=(batch, C, W, H))))
    mlflow.log_artifact("model_summary.txt")

    # model.apply(weight_reset)
    results = train_network(
        model=fc_model,
        loss_func=loss_func,
        train_loader=train_loader,
        valid_loader=valid_loader,
        # test_loader=test_loader
        epochs=epochs,
        optimizer=optimizer,
        score_funcs=score_funcs,
        device=device,
        checkpoint_file_save='final_model.pth'
        )

<img src="./images/E6_train_acc.png">

<img src="./images/E6_train_loss.png">

<img src="./images/E6_valid_acc.png">

<img src="./images/E6_valid_loss.png">

## Testing

In [11]:
def load_model_from_mlflow(
    run_id, artifact_path, model, device
):
    artifact_uri = f'runs:/{run_id}/{artifact_path}'
    checkpoint_path = mlflow.artifacts.download_artifacts(artifact_uri=artifact_uri)
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    # optimizer_state_dict = checkpoint['optimizer_state_dict']
    results = checkpoint['results']
    epoch = checkpoint['epoch']
    model.eval()
    model.to(device)
    return model, results, epoch

In [12]:
import ast


client = mlflow.tracking.MlflowClient()
filter_string = "tags.final_run = 'True'"
runs_list = client.search_runs(
    experiment_ids='679868600860297786',
    filter_string=filter_string,
    max_results=1,)

run_id = runs_list[0].info.run_id
run = mlflow.get_run(run_id)
params = run.data.params
activation_functions = {
'ReLU': nn.ReLU(),
'Tanh': nn.Tanh(),
'LeakyReLU': nn.LeakyReLU(),
'Sigmoid': nn.Sigmoid()
}
hidden_layers = int(params['hidden_layers'])
hidden_neurons = int(params['hidden_neurons'])
activation = activation_functions[params['activation']]
optimizer_dict = ast.literal_eval( params['optimizer'])
learning_rate = float(optimizer_dict['lr'])

layers = [nn.Flatten(),
    nn.Linear(D, hidden_neurons ),
    activation]
for _ in range(hidden_layers):
    layers.extend([nn.Linear(hidden_neurons,  hidden_neurons), activation])

layers.append(nn.Linear(hidden_neurons, classes),)
fc_model = nn.Sequential(*layers)
artifact_path = 'final_model.pth'
model, results, epoch = load_model_from_mlflow(
        run_id=run_id,
        artifact_path=artifact_path,
        model=fc_model, 
        device=device
    )

  checkpoint = torch.load(checkpoint_path, map_location=device)


In [19]:
score_funcs = {"Accuracy": accuracy_score_wrapper}
model.to(device)
model.eval()
with torch.no_grad():
    running_loss = []
    y_true = []
    y_pred = []
    for inputs, labels in tqdm(test_loader, desc='tetsing', leave=False):
        inputs = inputs.to(device)
        labels = labels.to(device)

        y_hat = model(inputs)
        loss = loss_func(y_hat, labels)
        running_loss.append(loss.item())

        if score_funcs is not None:
            labels = labels.detach().cpu().numpy()
            y_hat = y_hat.detach().cpu().numpy()
            y_true.extend(labels)
            y_pred.extend(y_hat)

    y_pred = np.asarray(y_pred)
    y_true = np.asanyarray(y_true)

    if score_funcs is not None and len(score_funcs) > 0:
        for score_name , score_func in score_funcs.items():
            score_value = score_func(y_pred, y_true)
            print(f'{score_name} = {score_value}')
            client.set_tag(run_id, f'test_{score_name}', score_value)

                                                        

Accuracy = 0.8743




In [18]:
score_func(y_pred, y_true)

0.8743