In [None]:
import pandas as pd
import numpy as np 
from tqdm import tqdm

import sys
import os
import glob
import json
import types

In [None]:
HP_folder = '../image_model_training/HP_scan_image_model/Run*/models/'

In [None]:

def load_results(filename, two_heads=False):
    """Loads conf and results from a file
    Args:
        filename    name of the json/npy file
        two_heads   set up class_output_size if missing
    """
    if filename.endswith(".npy"):
        return np.load(filename, allow_pickle=True).item()

    with open(filename, "r") as f:
        data = json.load(f)

    for key in ["model_type"]:
        if key not in data["conf"]:
            data["conf"][key] = None
    if two_heads and ("class_output_size" not in data["conf"]):
        data["conf"]["class_output_size"] = data["conf"]["output_size"]
        data["conf"]["regr_output_size"]  = 0

    data["conf"] = types.SimpleNamespace(**data["conf"])


    if "results" in data:
        for key in data["results"]:
            data["results"][key] = pd.read_json(data["results"][key])

    if "results_agg" in data:
        for key in data["results_agg"]:
            data["results_agg"][key] = pd.read_json(data["results_agg"][key], typ="series")

    for key in ["training", "validation"]:
        if key not in data:
            continue
        for dfkey in ["classification", "regression"]:
            data[key][dfkey] = pd.read_json(data[key][dfkey])
        for skey in ["classification_agg", "regression_agg"]:
            data[key][skey]  = pd.read_json(data[key][skey], typ="series")

    return data



In [None]:
eval_metric = 'roc_auc_score'
min_samples = 5
avg_perf = 0 

for file in tqdm(glob.glob(os.path.join(HP_folder, '*.json'))):
        res = load_results(file)
        new_avg_perf = np.mean(res['validation']['classification'].query('num_pos>@min_samples and num_neg>@min_samples')[eval_metric])
        if avg_perf < new_avg_perf : 
            avg_perf = new_avg_perf
            best_model = file.split('.json')[0].split('/')[-3] 

In [None]:
best_model, avg_perf

In [None]:
## AUC 

In [None]:
eval_metric = 'roc_auc_score'
min_samples = 5
avg_perf = 0 

for file in tqdm(glob.glob(os.path.join(HP_folder, '*.json'))):
        res = load_results(file)
        new_avg_perf = np.mean(res['validation']['classification_agg'][eval_metric])
        if avg_perf < new_avg_perf : 
            avg_perf = new_avg_perf
            best_model = file.split('.json')[0].split('/')[-3] 

In [None]:
best_model,avg_perf