In [1]:
import pandas as pd
import numpy as np
import os
from os.path import join
import optuna
from omegaconf import OmegaConf

from proT.euler_optuna.optuna_opt import OptunaStudy

def generate_summary_if_missing(folder_path: str):
    """Generate best_trial.yaml if missing by calling OptunaStudy.summary()"""
    
    if os.path.exists(join(folder_path, "best_trial.yaml")):
        return  # Already exists
    
    study_db = join(folder_path, "optuna", "study.db")
    if not os.path.exists(study_db):
        return  # No study to summarize
    
    try:
        # Get study name
        storage = f"sqlite:///{study_db}?timeout=60"
        study_name = optuna.get_all_study_summaries(storage=storage)[0].study_name
        
        # Create OptunaStudy and generate summary
        optuna_study = OptunaStudy(
            exp_dir=folder_path,
            data_dir="../data/input",
            cluster=False,
            study_name=study_name,
            manifest_tag="NA",
            study_path=join(folder_path, "optuna")
        )
        optuna_study.summary()
    except Exception as e:
        print(f"Failed to generate summary for {os.path.basename(folder_path)}: {e}")


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
baseline_result_path = "../experiments/baseline_optuna/euler/"
models = ["GRU", "LSTM", "proT", "proTNoNAIM", "proTPhySim","proTCVRand", "TCN", "MLP"]
fusions = ["cat", "sum"]
datasets = ["ishigami", "dyconex"]

model_list  = []
fusion_list = []
dataset_list = []
trial_list = []
trainable_params = []
av_epoch_training_time = []
best_test_mae = []
best_test_r2 = []
df = None

def get_info_from_folder(folder: str, options: list, option_list: list):
    for opt in options:
        opt_str = "_" + opt + "_"
        if opt_str in folder:
            option_list.append(opt)
            

for folder in os.listdir(baseline_result_path):
    
    # Auto-generate summary if missing
    generate_summary_if_missing(folder_path=join(baseline_result_path, folder))
    
    get_info_from_folder(folder, models, model_list)
    get_info_from_folder(folder, fusions, fusion_list)
    get_info_from_folder(folder, datasets, dataset_list)
    
    config_path = join(baseline_result_path, folder, "best_trial.yaml")
    
    if not os.path.exists(config_path):
        print(f"Skipping {folder}: best_trial.yaml not available")
        continue
    
    config = OmegaConf.load(config_path)
    
    best_trial = config.trial_number
    best_trial_path = join(baseline_result_path, folder,"optuna",f"run_{best_trial}", "kfold_summary.json")
    
    k_fold_summary = OmegaConf.load(best_trial_path)
    av_epoch_training_time.append(k_fold_summary.best_fold.metrics.avg_time_per_epoch)
    trainable_params.append(k_fold_summary.best_fold.metrics.trainable_params)
    best_test_mae.append(k_fold_summary.best_fold.metrics.test_mae.split('(')[1].rstrip(')'))
    best_test_r2.append(k_fold_summary.best_fold.metrics.test_r2.split('(')[1].rstrip(')'))
    trial_list.append(best_trial_path)
    
    
    metrics = pd.DataFrame().from_dict(config.metrics, orient="index")
    
    if df is None:
        df = metrics
    else:
        df = pd.concat([df,metrics],ignore_index=True,axis=1)
    
df = df.transpose()
print(model_list)
df["model"] = model_list
df["fusion"] = fusion_list
df["dataset"] = dataset_list
df["average_epoch_training_time"] = av_epoch_training_time
df["number_trainable_params"] = trainable_params
df["best fold test MAE"] = best_test_mae
df["best fold test R2"] = best_test_r2
df["best_trial"] = trial_list
round_digit = 3
df["test MAE"] = df["test_mae_mean"].round(round_digit).astype(str) + r"+/-" +df["test_mae_std"].round(round_digit).astype(str)
df["test R2"] = df["test_r2_mean"].round(round_digit).astype(str) + r"+/-" +df["test_r2_std"].round(round_digit).astype(str)

# print df
df = df[["model", "dataset", "fusion" ,"test MAE","test R2", "best fold test MAE","best fold test R2", "average_epoch_training_time", "number_trainable_params", "best_trial"]]


filepath = "./output"
os.makedirs(filepath, exist_ok=True)
df.to_csv(join(filepath, "baseline_results.csv"))

df


Best Trial Summary
Trial number: 49
Optimization metric (val_loss): 0.059312

Best Parameters:
  d_model_set: 180
  e_layers: 3
  d_layers: 3
  n_heads: 3
  d_ff: 600
  d_qk: 200
  dropout: 0.0
  lr: 0.0007000000000000001
  gamma: 0.00444751688107705

Summary saved to: ..\experiments\baseline_optuna\euler\baseline_proTCVRand_dyconex_sum_51972551\best_trial.yaml
Config path: /cluster/scratch/fscipion/baseline_proTCVRand_dyconex_sum_51972551/optuna/run_49/config.yaml

['GRU', 'GRU', 'LSTM', 'LSTM', 'MLP', 'MLP', 'proTCVRand', 'proTNoNAIM', 'proTNoNAIM', 'proTPhySim', 'proT', 'proT', 'proT', 'TCN', 'TCN']


Unnamed: 0,model,dataset,fusion,test MAE,test R2,best fold test MAE,best fold test R2,average_epoch_training_time,number_trainable_params,best_trial
0,GRU,dyconex,sum,0.081+/-0.004,0.505+/-0.044,0.0758,0.5596,4.528631,654793,../experiments/baseline_optuna/euler/baseline_...
1,GRU,ishigami,sum,0.037+/-0.002,0.426+/-0.006,0.0348,0.428,2.628296,35137,../experiments/baseline_optuna/euler/baseline_...
2,LSTM,dyconex,sum,0.084+/-0.005,0.4+/-0.234,0.077,0.5436,6.145012,1542793,../experiments/baseline_optuna/euler/baseline_...
3,LSTM,ishigami,sum,0.037+/-0.001,0.426+/-0.003,0.036,0.4216,2.977574,370049,../experiments/baseline_optuna/euler/baseline_...
4,MLP,dyconex,sum,0.085+/-0.015,0.484+/-0.14,0.0727,0.5823,0.827373,617737,../experiments/baseline_optuna/euler/baseline_...
5,MLP,ishigami,sum,0.041+/-0.001,0.252+/-0.008,0.0396,0.2478,2.898114,580737,../experiments/baseline_optuna/euler/baseline_...
6,proTCVRand,dyconex,sum,0.061+/-0.006,0.719+/-0.047,0.0548,0.7802,10.214549,6391621,../experiments/baseline_optuna/euler/baseline_...
7,proTNoNAIM,dyconex,sum,0.068+/-0.003,0.62+/-0.021,0.0634,0.6455,5.466464,1856501,../experiments/baseline_optuna/euler/baseline_...
8,proTNoNAIM,ishigami,sum,0.003+/-0.0,0.998+/-0.0,0.0027,0.9983,8.617724,1516801,../experiments/baseline_optuna/euler/baseline_...
9,proTPhySim,dyconex,sum,0.076+/-0.002,0.531+/-0.018,0.0749,0.5413,6.485203,1373401,../experiments/baseline_optuna/euler/baseline_...
