In [6]:
import json
import os
from collections import Counter
from pathlib import Path

import pandas as pd

# Gather metrics

In [7]:
# 1. Lists all directories in the previous dir
path_benchmarks = Path("../benchmarks")
model_names = next(os.walk(path_benchmarks), (None, None, []))[1]
model_names
# 2. Create a dict with the name of the models as keys
dict_results = {m: {} for m in model_names}
dict_results
# 3. The values will be a new dict with the denomination of the "tasks_..." or "tasks" dir as key
for m in dict_results.keys():
    path_tasks = path_benchmarks / m
    for d in next(os.walk(path_tasks), (None, None, []))[1]:
        if "tasks" in d:
            dict_results[m].update({d: {}})

print(dict_results)
# 4. The values of this last dict will be a new dict with the task (or split) as key so random_125 for example
for m in dict_results.keys():
    path_tasks = path_benchmarks / m
    for t in dict_results[m].keys():
        path_split = path_tasks / t
        for d in next(os.walk(path_split), (None, None, []))[1]:
            if "distribution" in d or "random" in d:
                dict_results[m][t].update({d: {}})

print(dict_results)
# 5. The values of this last dict will be a new dict with the type of features (mmf e.g.) as key
for m in dict_results.keys():
    path_tasks = path_benchmarks / m
    for t in dict_results[m].keys():
        path_split = path_tasks / t
        for s in dict_results[m][t].keys():
            path_feat = path_split / s

            for f in next(os.walk(path_feat), (None, None, []))[2]:
                if "json" not in f:
                    continue
                path_results = path_feat / f
                with open(path_results, "r") as file:
                    results = json.load(file)
                dict_results[m][t][s].update({f.split(".json")[0]: results["metrics"]})

# display(dict_results)

{'modnet_nan': {'tasks': {}}, 'coGN': {'tasks': {}}, 'claude-3.5-sonnet': {'tasks': {}}, 'matten': {'tasks': {}}, 'modnet': {'tasks': {}}, 'coNGN': {'tasks': {}}, 'openai-gpt-4o': {'tasks': {}}, 'mean_value': {'tasks': {}}, 'megnet': {}, 'et': {'tasks_dflt': {}, 'tasks_opti': {}, 'tasks_An': {}}, 'median_value': {'tasks': {}}, 'deepseek-chat': {}, 'darwin-1.5': {'tasks': {}}, 'lgbm': {'tasks_dflt': {}, 'tasks_opti': {}, 'tasks_An': {}}}
{'modnet_nan': {'tasks': {'distribution_125': {}, 'distribution_250': {}, 'random_125': {}, 'random_250': {}}}, 'coGN': {'tasks': {'distribution_125': {}, 'distribution_250': {}, 'random_125': {}, 'random_250': {}}}, 'claude-3.5-sonnet': {'tasks': {'distribution_125': {}, 'random_125': {}, 'random_250': {}}}, 'matten': {'tasks': {'distribution_125': {}, 'distribution_250': {}, 'random_125': {}, 'random_250': {}}}, 'modnet': {'tasks': {'distribution_125': {}, 'distribution_250': {}, 'random_125': {}, 'random_250': {}}}, 'coNGN': {'tasks': {'distribution_

In [8]:
# Function to unravel the dict keys
f = {}
stack = [(dict_results, "")]  # Stack holds tuples of (current_dict, current_key)

while stack:
    c, p = stack.pop()

    for k, v in c.items():
        new_key = f"{p}_{k}" if p else k

        if isinstance(v, dict):
            stack.append((v, new_key))  # Push the nested dictionary onto the stack
        else:
            f[new_key] = v  # Add to the flattened dictionary

# print(f)

In [9]:
dict_results_ravel = {}
for k, v in f.items():
    new_k, metric = k.replace("_tasks", "").split("results_")
    if new_k not in dict_results_ravel.keys():
        dict_results_ravel[new_k] = {}
    if "modnet_nan" in new_k or "_An_" in new_k:
        continue
    dict_results_ravel[new_k][metric] = v

In [10]:
def df_style(val):
    return "color: red"


def df_style_bold(val):
    return "font-weight: bold"


import numpy as np


def highlight_max(s, props="color:red"):
    return np.where(s == np.nanmax(s.values), props, "")


df_results = pd.DataFrame.from_dict(dict_results_ravel, orient="index")

dict_df_results_splits = {}
for split in ["random_125", "random_250", "distribution_125", "distribution_250"]:
    indexes = []
    for i in df_results.index:
        if split in i:
            indexes.append(i)
    df_tmp = df_results.filter(indexes, axis=0)
    new_index = []
    for i in df_tmp.index:
        tmp = i.replace(split, "").replace("__", "_")
        splitted_tmp = tmp.split("_")
        if (
            splitted_tmp[0] in splitted_tmp[1]
        ):  # Which is the case for claude and darwin
            tmp = tmp.split(splitted_tmp[0] + "_")[1]

        new_index.append(tmp)
    df_tmp.index = new_index
    df_tmp = df_tmp.sort_values(by="spearman", ascending=False)

    # and apply styling to it via the `subset` arg; first arg is styler function above
    idx_best = []
    idx_best.extend(df_tmp[["mae", "rmse"]].idxmin().tolist())
    # idx_best.extend(df_tmp[["spearman", "r2_score"]].idxmax().tolist())
    idx_best.extend(df_tmp[["spearman"]].idxmax().tolist())
    df_tmp = df_tmp.style.applymap(
        df_style, subset=(idx_best, ["mae", "rmse", "spearman", "r2_score"])
    )

    idx_best_max, idx_best_max_count = Counter(idx_best).most_common(1)[0]
    if idx_best_max_count > 1:
        df_tmp = df_tmp.applymap(
            df_style_bold,
            subset=([idx_best_max], ["mae", "rmse", "spearman", "r2_score"]),
        )

    dict_df_results_splits[split] = df_tmp

    print(split)
    display(dict_df_results_splits[split])

random_125


Unnamed: 0,mae,rmse,spearman,r2_score
coNGN_,7.178967,16.071557,0.934316,0.726453
modnet_mmf_pgnn_,7.304253,19.423989,0.916129,0.600429
modnet_mmf_,8.803178,23.035541,0.907785,0.438029
modnet_pgnn_,8.262533,19.751812,0.901475,0.586828
et_dflt_mmf_pgnn_,8.307911,19.589507,0.894224,0.593591
et_dflt_mmf_,8.192585,19.784299,0.892774,0.585468
lgbm_opti_mmf_pgnn_,8.57264,19.129844,0.890022,0.612439
et_dflt_pgnn_,8.341509,19.831881,0.889002,0.583472
lgbm_dflt_mmf_pgnn_,7.689738,18.333103,0.880614,0.64405
lgbm_dflt_mmf_,7.743431,18.366044,0.87922,0.64277


random_250


Unnamed: 0,mae,rmse,spearman,r2_score
modnet_mmf_pgnn_,7.669056,19.340598,0.911464,0.525602
modnet_mmf_,8.18014,19.811614,0.905217,0.502214
coNGN_,7.648139,19.712461,0.904744,0.507185
modnet_pgnn_,7.761428,18.943342,0.8917,0.544891
et_dflt_mmf_,7.545741,18.001918,0.888101,0.589002
coGN_,8.064375,19.08108,0.882304,0.538248
et_dflt_mmf_pgnn_,8.35804,18.637842,0.881947,0.559451
lgbm_dflt_mmf_pgnn_,9.191596,19.964367,0.865527,0.494509
et_dflt_pgnn_,8.775236,19.223841,0.863953,0.531313
lgbm_opti_mmf_pgnn_,9.213405,19.764144,0.854525,0.504597


distribution_125


Unnamed: 0,mae,rmse,spearman,r2_score
modnet_mmf_,6.492044,13.945333,0.864332,0.743069
modnet_mmf_pgnn_,5.567543,12.074188,0.860172,0.807392
modnet_pgnn_,6.542703,14.96214,0.855152,0.704235
lgbm_dflt_mmf_pgnn_,7.301577,14.945985,0.843207,0.704873
matten_,7.317151,19.053818,0.821155,0.520351
lgbm_opti_mmf_,7.514003,15.45016,0.820043,0.684626
et_dflt_mmf_pgnn_,7.202488,14.867961,0.815736,0.707947
coNGN_,6.960003,19.004716,0.814673,0.52282
et_dflt_mmf_,6.839354,14.120979,0.813573,0.736556
coGN_,6.707882,15.604909,0.812836,0.678277


distribution_250


Unnamed: 0,mae,rmse,spearman,r2_score
modnet_mmf_pgnn_,5.765881,15.279833,0.883553,0.635569
modnet_pgnn_,5.817681,14.48358,0.869175,0.672562
modnet_mmf_,5.609491,14.656513,0.869088,0.664696
coNGN_,6.015887,15.535816,0.86087,0.623256
coGN_,5.964593,15.13759,0.854948,0.642323
et_dflt_pgnn_,6.675432,15.839602,0.849602,0.608379
et_dflt_mmf_pgnn_,6.189507,15.023034,0.844422,0.647716
et_dflt_mmf_,6.144405,15.278746,0.83796,0.635621
lgbm_dflt_mmf_pgnn_,6.410858,14.737723,0.835139,0.66097
lgbm_opti_mmf_pgnn_,7.191935,16.425163,0.832785,0.578888


# Investigate predictions errors

In [11]:
# 1. Lists all directories in the previous dir
path_benchmarks = Path("../benchmarks")
model_names = next(os.walk(path_benchmarks), (None, None, []))[1]
model_names
# 2. Create a dict with the name of the models as keys
dict_results = {m: {} for m in model_names}
dict_results
# 3. The values will be a new dict with the denomination of the "tasks_..." or "tasks" dir as key
for m in dict_results.keys():
    path_tasks = path_benchmarks / m
    for d in next(os.walk(path_tasks), (None, None, []))[1]:
        if "tasks" in d:
            dict_results[m].update({d: {}})

# print(dict_results)
# 4. The values of this last dict will be a new dict with the task (or split) as key so random_125 for example
for m in dict_results.keys():
    path_tasks = path_benchmarks / m
    for t in dict_results[m].keys():
        path_split = path_tasks / t
        for d in next(os.walk(path_split), (None, None, []))[1]:
            if "distribution" in d or "random" in d:
                dict_results[m][t].update({d: {}})

# print(dict_results)
# 5. The values of this last dict will be a new dict with the type of features (mmf e.g.) as key
for m in dict_results.keys():
    path_tasks = path_benchmarks / m
    for t in dict_results[m].keys():
        path_split = path_tasks / t
        for s in dict_results[m][t].keys():
            path_feat = path_split / s

            for f in next(os.walk(path_feat), (None, None, []))[2]:
                if "json" not in f:
                    continue
                path_results = path_feat / f
                with open(path_results, "r") as file:
                    results = json.load(file)
                dict_results[m][t][s].update(
                    {f.split(".json")[0]: results["predictions"]}
                )

# Function to unravel the dict keys
f = {}
stack = [(dict_results, "")]  # Stack holds tuples of (current_dict, current_key)

while stack:
    c, p = stack.pop()

    for k, v in c.items():
        new_key = f"{p}_{k}" if p else k

        if isinstance(v, dict):
            stack.append((v, new_key))  # Push the nested dictionary onto the stack
        else:
            f[new_key] = v  # Add to the flattened dictionary

# print(f)

# Unravel the dict keys
dict_results_ravel = {}
for k, v in f.items():
    new_k, metric = k.replace("_tasks", "").split("results_")
    if new_k not in dict_results_ravel.keys():
        dict_results_ravel[new_k] = {}
    dict_results_ravel[new_k][metric] = v

# display(dict_results_ravel)

In [12]:
# for split in SHG_BENCHMARK_SPLITS:
#     for k, v in dict_results_ravel.items():
#         if split not in k:  # to sort by holdout set
#             continue
#         if "_An" in k:  # too many figures otherwise
#             continue
#         if (
#             "et_" in k or "lgbm" in k or "modnet" in k
#         ) and "mmf_pgnn" not in k:  # too many figures otherwise
#             continue
#         if "claude" in k or "darwin" in k:
#             continue
#         df_pred = pd.DataFrame.from_dict(v, orient="index")
#         holdout_df = load_holdout(split)
#         df_pred = df_pred.filter(holdout_df.index, axis=0)

#         holdout_df_vt = holdout_df[holdout_df["origin"] != "Naccarato"]
#         df_pred_vt = df_pred.filter(holdout_df_vt.index, axis=0)
#         holdout_df_nac = holdout_df[holdout_df["origin"] == "Naccarato"]
#         df_pred_nac = df_pred.filter(holdout_df_nac.index, axis=0)

#         true_values_vt = holdout_df_vt["dKP_full_neum"].tolist()
#         pred_values_vt = df_pred_vt[0].tolist()

#         true_values_nac = holdout_df_nac["dKP_full_neum"].tolist()
#         pred_values_nac = df_pred_nac[0].tolist()

#         # Scatter plot for previous outputs.
#         scatter_plot_vt = go.Scatter(
#             x=np.log(true_values_vt),
#             y=np.log(pred_values_vt),
#             mode="markers",
#             name="",
#             showlegend=False,
#             text=[mpid for mpid in holdout_df_vt.index.tolist()],
#         )

#         # Scatter plot for previous outputs.
#         scatter_plot_nac = go.Scatter(
#             x=np.log(true_values_nac),
#             y=np.log(pred_values_nac),
#             mode="markers",
#             marker={"color": "red"},
#             name="",
#             showlegend=False,
#             text=[mpid for mpid in holdout_df_nac.index.tolist()],
#         )

#         ideal = go.Scatter(
#             x=[-10, 8],
#             y=[-10, 8],
#             mode="lines",
#             line=dict(color="gray", dash="dot"),
#             showlegend=False,
#         )

#         # Layout
#         layout = go.Layout(
#             title=dict(text=f"{k}"),
#             xaxis=dict(title="ln(<i>d</i><sub>KP</sub>) (pm/V)", range=[-6.2, 6.2]),
#             yaxis=dict(
#                 title="ln(<i>d&#770;</i><sub>KP</sub>) (pm/V)", range=[-6.2, 6.2]
#             ),
#             # legend=dict(font=dict(size=12)),
#         )

#         # Create figure
#         fig = go.Figure(data=[scatter_plot_vt, scatter_plot_nac, ideal], layout=layout)

#         fig.update_layout(
#             autosize=False,
#             font_size=20,
#             width=600,
#             height=600,
#             # plot_bgcolor="white",
#             template="simple_white",
#         )
#         fig.update_layout(
#             xaxis=dict(
#                 tickmode="linear",
#                 tick0=0,
#                 dtick=2,
#                 showgrid=False,
#             ),
#             yaxis=dict(
#                 tickmode="linear",
#                 tick0=0,
#                 dtick=2,
#                 showgrid=False,
#             ),
#         )

#         fig.show()