This part is done after training if Data Scientist would like to manually review which models are the best.

### Parse mlflow to compute scores and get the best models
- extract metadata fold-level, architecture and hyperparameter combination and filter out too big differences in losses (don't take overfitting ones)
- compute averages across folds
- compute scores
- select the best score 

The same scripts are in crossval_score.py for automated approach.

In [None]:
from pathlib import Path
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import yaml
import sys
from collections import defaultdict
import numpy as np

project_root = Path.cwd().parent 
sys.path.append(str(project_root))
from src.data.dataloader import get_dataloaders
from src.evaluation.plots import plot_roc_ovr

### Define parameters

In [2]:
Dataset_name = "food-101_30%_tr70_va15_te15"

experiment_name = f"{Dataset_name}_2025-08-07_19-10-53"  # change to experiment you like to analyze

Data_dir = Path.cwd().parent / f"Data/{Dataset_name}"

classes_names_file = Data_dir / "classes.txt"

with open(classes_names_file, "r") as f:
    class_names = [line.strip() for line in f.readlines()]
    
print(class_names)

config_path = Path.cwd().parent / "src" / "config.yaml"
with open(config_path, "r") as f:
    config = yaml.safe_load(f)

# For mlflow
tracking_path = Path.cwd().parent / "experiments/mlruns"
mlflow.set_tracking_uri(uri=tracking_path.as_uri())

['apple_pie', 'beef_tartare', 'caesar_salad', 'cannoli']


In [3]:
_, _, test_loader, _ = get_dataloaders(
    image_size=config["image_size"],
    batch_size=config["batch_size"],
    fold="fold0"  # consistent fold for testing. We could use any fold we want or dedicated held-out testing set 
)

In [4]:
mlflow_path_dir = Path.cwd().parent / "experiments/mlruns"
mlflow.set_tracking_uri(mlflow_path_dir.as_uri())

### Manual model picking

In [5]:
def get_all_runs(experiment_name: str) -> pd.DataFrame:
    experiment = mlflow.get_experiment_by_name(experiment_name)
    if experiment is None:
        raise ValueError(f"Experiment '{experiment_name}' not found.")
    
    runs = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        filter_string="attributes.status = 'FINISHED'",
        output_format="pandas"
    )
    return runs


In [6]:
runs = get_all_runs(experiment_name)  
runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.train_acc,metrics.val_loss,metrics.train_loss,metrics.train_time_sec,...,params.optimizer,tags.mlflow.user,tags.training_time_readable,tags.config,tags.fold,tags.mlflow.runName,tags.mlflow.source.git.commit,tags.mlflow.source.type,tags.architecture,tags.mlflow.source.name
0,caadda6e2bab45d58b1102a4097c814a,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:17:06.204000+00:00,2025-08-07 17:17:15.483000+00:00,0.479167,1.338085,1.164298,6.579795,...,Adam,kamil-solski,6.58 sec,"hu=16,lr=0.001",fold4,Food101_0_hu16_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py
1,b2fb9e7daad54cd493e19d23450b8d2a,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:57.203000+00:00,2025-08-07 17:17:06.201000+00:00,0.458333,1.208772,1.238802,6.344281,...,Adam,kamil-solski,6.34 sec,"hu=16,lr=0.0001",fold4,Food101_0_hu16_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py
2,c9877749a9d34db6b0856e06968c92a0,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:48.061000+00:00,2025-08-07 17:16:57.200000+00:00,0.535417,1.132147,1.076523,6.423157,...,Adam,kamil-solski,6.42 sec,"hu=8,lr=0.001",fold4,Food101_0_hu8_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py
3,1c5ede19ee0348fa86b0d016830ec961,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:38.914000+00:00,2025-08-07 17:16:48.058000+00:00,0.3625,1.336356,1.350479,6.445387,...,Adam,kamil-solski,6.45 sec,"hu=8,lr=0.0001",fold4,Food101_0_hu8_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py
4,2f6f64e1f9584e0b8c40e5d6b133d40c,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:29.561000+00:00,2025-08-07 17:16:38.911000+00:00,0.428125,1.296844,1.258376,6.647668,...,Adam,kamil-solski,6.65 sec,"hu=16,lr=0.001",fold4,Food101_hu16_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py
5,4d3f690256a1404a8f5306c3bae6e59b,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:20.076000+00:00,2025-08-07 17:16:29.557000+00:00,0.380208,1.297415,1.313604,6.745456,...,Adam,kamil-solski,6.75 sec,"hu=16,lr=0.0001",fold4,Food101_hu16_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py
6,acdce4713f29494fb41a74bacfb0cf51,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:10.703000+00:00,2025-08-07 17:16:20.073000+00:00,0.423958,1.231633,1.256739,6.682466,...,Adam,kamil-solski,6.68 sec,"hu=8,lr=0.001",fold4,Food101_hu8_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py
7,ee607fd4d5eb46e1b6030e813efbe723,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:15:58.609000+00:00,2025-08-07 17:16:10.700000+00:00,0.26875,1.382221,1.383013,9.355141,...,Adam,kamil-solski,9.36 sec,"hu=8,lr=0.0001",fold4,Food101_hu8_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py
8,5c27007ca1c84d67ba403c4e9fff9e0f,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:15:48.884000+00:00,2025-08-07 17:15:58.603000+00:00,0.528125,1.143807,1.108587,6.815546,...,Adam,kamil-solski,6.82 sec,"hu=16,lr=0.001",fold3,Food101_0_hu16_lr0.001_fold3,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py
9,4653a923eda041468990fb4cafcf2d85,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:15:39.672000+00:00,2025-08-07 17:15:48.881000+00:00,0.430208,1.254495,1.257044,6.517787,...,Adam,kamil-solski,6.52 sec,"hu=16,lr=0.0001",fold3,Food101_0_hu16_lr0.0001_fold3,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py


In [7]:
def filter_by_loss_discrepancy(runs: pd.DataFrame, threshold: float = 0.25) -> pd.DataFrame:
    runs["loss_diff"] = (runs["metrics.train_loss"] - runs["metrics.val_loss"]).abs()
    return runs[runs["loss_diff"] <= threshold].copy()


In [8]:
filterd_runs = filter_by_loss_discrepancy(runs, threshold=0.25)
filterd_runs

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.train_acc,metrics.val_loss,metrics.train_loss,metrics.train_time_sec,...,tags.mlflow.user,tags.training_time_readable,tags.config,tags.fold,tags.mlflow.runName,tags.mlflow.source.git.commit,tags.mlflow.source.type,tags.architecture,tags.mlflow.source.name,loss_diff
0,caadda6e2bab45d58b1102a4097c814a,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:17:06.204000+00:00,2025-08-07 17:17:15.483000+00:00,0.479167,1.338085,1.164298,6.579795,...,kamil-solski,6.58 sec,"hu=16,lr=0.001",fold4,Food101_0_hu16_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py,0.173787
1,b2fb9e7daad54cd493e19d23450b8d2a,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:57.203000+00:00,2025-08-07 17:17:06.201000+00:00,0.458333,1.208772,1.238802,6.344281,...,kamil-solski,6.34 sec,"hu=16,lr=0.0001",fold4,Food101_0_hu16_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py,0.03003
2,c9877749a9d34db6b0856e06968c92a0,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:48.061000+00:00,2025-08-07 17:16:57.200000+00:00,0.535417,1.132147,1.076523,6.423157,...,kamil-solski,6.42 sec,"hu=8,lr=0.001",fold4,Food101_0_hu8_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py,0.055623
3,1c5ede19ee0348fa86b0d016830ec961,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:38.914000+00:00,2025-08-07 17:16:48.058000+00:00,0.3625,1.336356,1.350479,6.445387,...,kamil-solski,6.45 sec,"hu=8,lr=0.0001",fold4,Food101_0_hu8_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py,0.014123
4,2f6f64e1f9584e0b8c40e5d6b133d40c,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:29.561000+00:00,2025-08-07 17:16:38.911000+00:00,0.428125,1.296844,1.258376,6.647668,...,kamil-solski,6.65 sec,"hu=16,lr=0.001",fold4,Food101_hu16_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py,0.038468
5,4d3f690256a1404a8f5306c3bae6e59b,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:20.076000+00:00,2025-08-07 17:16:29.557000+00:00,0.380208,1.297415,1.313604,6.745456,...,kamil-solski,6.75 sec,"hu=16,lr=0.0001",fold4,Food101_hu16_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py,0.016188
6,acdce4713f29494fb41a74bacfb0cf51,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:16:10.703000+00:00,2025-08-07 17:16:20.073000+00:00,0.423958,1.231633,1.256739,6.682466,...,kamil-solski,6.68 sec,"hu=8,lr=0.001",fold4,Food101_hu8_lr0.001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py,0.025105
7,ee607fd4d5eb46e1b6030e813efbe723,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:15:58.609000+00:00,2025-08-07 17:16:10.700000+00:00,0.26875,1.382221,1.383013,9.355141,...,kamil-solski,9.36 sec,"hu=8,lr=0.0001",fold4,Food101_hu8_lr0.0001_fold4,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101,src/cli.py,0.000793
8,5c27007ca1c84d67ba403c4e9fff9e0f,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:15:48.884000+00:00,2025-08-07 17:15:58.603000+00:00,0.528125,1.143807,1.108587,6.815546,...,kamil-solski,6.82 sec,"hu=16,lr=0.001",fold3,Food101_0_hu16_lr0.001_fold3,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py,0.03522
9,4653a923eda041468990fb4cafcf2d85,600923659547953467,FINISHED,file:///home/kamil-solski/Documents/Python/Pro...,2025-08-07 17:15:39.672000+00:00,2025-08-07 17:15:48.881000+00:00,0.430208,1.254495,1.257044,6.517787,...,kamil-solski,6.52 sec,"hu=16,lr=0.0001",fold3,Food101_0_hu16_lr0.0001_fold3,27d5d6dee353391931932da4c3110f7ebc3e5065,LOCAL,Food101_0,src/cli.py,0.002549


In [9]:
def group_by_arch_and_config(runs: pd.DataFrame, val_metric: str = "val_acc") -> pd.DataFrame:
    grouped = runs.groupby(["tags.architecture", "tags.config"])

    summary = []
    for (arch, config), group in grouped:
        avg_val_loss = group["metrics.val_loss"].mean()
        avg_val_metric = group[f"metrics.{val_metric}"].mean()

        summary.append({
            "architecture": arch,
            "config": config,
            "folds": len(group),
            "avg_val_loss": avg_val_loss,
            f"avg_{val_metric}": avg_val_metric
        })

    return pd.DataFrame(summary)


In [10]:
grouped_runs = group_by_arch_and_config(filterd_runs, val_metric="val_acc")
grouped_runs  # there might not be all hyperparameter combinations if those with to big loss discrepancy were filtered

Unnamed: 0,architecture,config,folds,avg_val_loss,avg_val_acc
0,Food101,"hu=16,lr=0.0001",5,1.321466,0.366146
1,Food101,"hu=16,lr=0.001",5,1.183307,0.479687
2,Food101,"hu=8,lr=0.0001",5,1.385394,0.265625
3,Food101,"hu=8,lr=0.001",5,1.264498,0.385417
4,Food101_0,"hu=16,lr=0.0001",5,1.282962,0.408854
5,Food101_0,"hu=16,lr=0.001",4,1.178161,0.489583
6,Food101_0,"hu=8,lr=0.0001",5,1.35863,0.344792
7,Food101_0,"hu=8,lr=0.001",5,1.156671,0.495312


Now with averages we can normalize:
$$\text{loss}_{\text{score}} = 1 - \frac{\text{avg}_{\text{val-loss}} - \text{min}_{\text{val-loss}}}{\text{max}_{\text{val-loss}} - \text{min}_{\text{val-loss}}}$$

$$\text{acc}_{\text{score}} = \frac{\text{avg}_{\text{val-acc}} - \text{min}_{\text{val-acc}}}{\text{max}_{\text{val-acc}} - \text{min}_{\text{val-acc}}}$$

And compute scores (to choose the best):
$${score} = \text{acc}_{\text{score}} \cdot \text{acc}_{\text{weight}} + \text{loss}_{\text{score}} \cdot \text{loss}_{\text{weight}}$$

At the end, take those best hypeparameter models and compare them between architectures on test data.

In [11]:
def score_models(df: pd.DataFrame, val_metric: str = "val_acc",
                 acc_weight: float = 0.7, loss_weight: float = 0.3) -> pd.DataFrame:
    df = df.copy()

    # Normalize
    df["loss_score"] = 1 - (df["avg_val_loss"] - df["avg_val_loss"].min()) / (df["avg_val_loss"].max() - df["avg_val_loss"].min())
    df["metric_score"] = (df[f"avg_{val_metric}"] - df[f"avg_{val_metric}"].min()) / (df[f"avg_{val_metric}"].max() - df[f"avg_{val_metric}"].min())

    # Weighted score
    df["score"] = acc_weight * df["metric_score"] + loss_weight * df["loss_score"]
    return df.sort_values("score", ascending=False)


In [12]:
# If you want to check which one is the best by scoring algorithm (the same is in automated pipeline) run this cell:
scored = score_models(grouped_runs, val_metric="val_acc", acc_weight=0.7, loss_weight=0.3)  # you can modify weights based on how important you think those metrics sould be
scored  # look at scores

Unnamed: 0,architecture,config,folds,avg_val_loss,avg_val_acc,loss_score,metric_score,score
7,Food101_0,"hu=8,lr=0.001",5,1.156671,0.495312,1.0,1.0,1.0
5,Food101_0,"hu=16,lr=0.001",4,1.178161,0.489583,0.906044,0.975057,0.954353
1,Food101,"hu=16,lr=0.001",5,1.183307,0.479687,0.883544,0.931973,0.917444
4,Food101_0,"hu=16,lr=0.0001",5,1.282962,0.408854,0.447843,0.623583,0.570861
3,Food101,"hu=8,lr=0.001",5,1.264498,0.385417,0.528568,0.521542,0.52365
0,Food101,"hu=16,lr=0.0001",5,1.321466,0.366146,0.279498,0.437642,0.390198
6,Food101_0,"hu=8,lr=0.0001",5,1.35863,0.344792,0.117014,0.344671,0.276374
2,Food101,"hu=8,lr=0.0001",5,1.385394,0.265625,0.0,0.0,0.0


In [13]:
def select_best_configs(df_scored: pd.DataFrame) -> pd.DataFrame:
    return df_scored.loc[df_scored.groupby("architecture")["score"].idxmax()].reset_index(drop=True)

In [14]:
best_config = select_best_configs(scored)
best_config

Unnamed: 0,architecture,config,folds,avg_val_loss,avg_val_acc,loss_score,metric_score,score
0,Food101,"hu=16,lr=0.001",5,1.183307,0.479687,0.883544,0.931973,0.917444
1,Food101_0,"hu=8,lr=0.001",5,1.156671,0.495312,1.0,1.0,1.0


In [15]:
def load_best_models(best_config: pd.DataFrame, experiment_name: str) -> dict:
    """
    Loads best models from MLflow using best_config dataframe.
    Returns: dict of {architecture_name: model}
    """
    model_dict = {}

    for _, row in best_config.iterrows():
        arch = row["architecture"]
        config_str = row["config"]

        # Extract hyperparameters
        hu = int(config_str.split("hu=")[1].split(",")[0])
        lr = float(config_str.split("lr=")[1])

        # Search for the corresponding run
        experiment = mlflow.get_experiment_by_name(experiment_name)
        runs = mlflow.search_runs(
            experiment_ids=[experiment.experiment_id],
            filter_string=f"tags.architecture = '{arch}' and tags.config = '{config_str}'",
            output_format="pandas"
        )
        if runs.empty:
            raise ValueError(f"No run found for arch={arch}, config={config_str}")
        
        # Use the best-scoring run (highest val_acc or final score)
        best_run = runs.sort_values("metrics.val_acc", ascending=False).iloc[0]
        run_id = best_run.run_id

        # Load model from MLflow artifact
        model = mlflow.pytorch.load_model(f"runs:/{run_id}/model")
        model.eval()
        model_dict[arch] = {
            "model": model,
            "run_id": run_id,
            "hu": hu,
            "lr": lr
        }

    return model_dict

In [16]:
model_dict = load_best_models(best_config=best_config, experiment_name=experiment_name)

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

In [17]:
model_dict

{'Food101': {'model': Food101(
    (conv_block_1): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
      (1): ReLU()
      (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (conv_block_2): Sequential(
      (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
      (1): ReLU()
      (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (conv_block_3): Sequential(
      (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
      (1): ReLU()
      (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1))
      (3): ReLU()
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (classifier): Sequential(
      (0): Flatten(start_dim=1, end_dim=-1)
      (1): Linear(in_features=256, out_features=4,

### Plot and find the best auc

In [19]:
fig, auc_scores = plot_roc_ovr(
    model_dict=model_dict,
    test_dataloader=test_loader,
    class_names=class_names
)

In [None]:
# Save plot manually because plot script doesn't have that option since it is non-interactive
path_to_plot_roc = Path.cwd().parent / "outputs/figures/roc_overlay_ovr.png"
fig.savefig(path_to_plot_roc)

In [21]:
auc_scores

{'apple_pie': {'Food101_hu16_lr0.001': {'auc': 0.7618518518518519,
   'run_id': '03b7188fc24c4d68a0f2581c44297072'},
  'Food101_0_hu8_lr0.001': {'auc': 0.8188888888888889,
   'run_id': '3e0b7710f6ed466298632b12beb22f7d'}},
 'beef_tartare': {'Food101_hu16_lr0.001': {'auc': 0.731111111111111,
   'run_id': '03b7188fc24c4d68a0f2581c44297072'},
  'Food101_0_hu8_lr0.001': {'auc': 0.8911111111111112,
   'run_id': '3e0b7710f6ed466298632b12beb22f7d'}},
 'caesar_salad': {'Food101_hu16_lr0.001': {'auc': 0.9314814814814815,
   'run_id': '03b7188fc24c4d68a0f2581c44297072'},
  'Food101_0_hu8_lr0.001': {'auc': 0.9159259259259259,
   'run_id': '3e0b7710f6ed466298632b12beb22f7d'}},
 'cannoli': {'Food101_hu16_lr0.001': {'auc': 0.7192592592592593,
   'run_id': '03b7188fc24c4d68a0f2581c44297072'},
  'Food101_0_hu8_lr0.001': {'auc': 0.8714814814814815,
   'run_id': '3e0b7710f6ed466298632b12beb22f7d'}}}

In [22]:
def select_best_model_from_auc(auc_scores: dict) -> dict:
    """
    auc_scores: dict[class_name][model_name] = {"auc": float, "run_id": str}

    Returns:
        best_model_dict: {model_full_name: run_id}
        model_avg_auc: {model_full_name: average_auc}
    """
    model_totals = defaultdict(list)
    model_run_ids = {}

    for class_aucs in auc_scores.values():
        for model_name, data in class_aucs.items():
            model_totals[model_name].append(data["auc"])
            model_run_ids[model_name] = data["run_id"]

    model_avg_auc = {
        model: np.mean(scores) for model, scores in model_totals.items()
    }

    best_model_name = max(model_avg_auc, key=model_avg_auc.get)
    best_run_id = model_run_ids[best_model_name]

    best_model_dict = {best_model_name: best_run_id}

    return best_model_dict, model_avg_auc

In [24]:
best_model, model_avg_auc = select_best_model_from_auc(auc_scores=auc_scores)
print(f"{best_model}\n\n{model_avg_auc}")

{'Food101_0_hu8_lr0.001': '3e0b7710f6ed466298632b12beb22f7d'}

{'Food101_hu16_lr0.001': np.float64(0.7859259259259259), 'Food101_0_hu8_lr0.001': np.float64(0.8743518518518518)}


### Model registry
WARNING!!! We can't register model with onnx, but we can log it with onnx. So, we need to pick best convert to onnx and then register with challenger (and this is exactly what we do in onnx_register.py in automated pipeline)

In [None]:
client = MlflowClient()

# Manual register model (optional)
for model_name, run_id in best_model.items():
    model_registry_name = "manual_best_model"
    
    result = mlflow.register_model(
        model_uri=f"runs:/{run_id}/model",
        name=model_registry_name
    )
    # Assign challenger alias to the newest registered version
    client.set_registered_model_alias(
        name=model_registry_name,
        version=result.version,
        alias="challenger"
    )
    
# To promote model manualy just enter mlflow ui and click "Promote model"