In [1]:
# !pip install papermill

In [2]:
# Parameters
scenarios = ["binary", "multiclass", "multilabel_native", "multilabel_powerset", "all"]
alpha = 0.05

In [3]:
import pandas as pd
import papermill as pm

# Step 0: Results Gatherer
print(f"Running Step 0 (Results Gatherer)...")
try:
    _ = pm.execute_notebook("stats_step_0_results_gatherer.ipynb", None)
except Exception as e:
    print("Error in Step 0 (Results Gatherer):", e)

Running Step 0 (Results Gatherer)...


Executing:   0%|          | 0/6 [00:00<?, ?cell/s]

In [4]:
step_1_dfs_f1, step_1_dfs_time = [], []
step_2_dfs_f1, step_2_dfs_time = [], []

for scenario in scenarios:
    
    # Step 1: Per-Dataset Analysis
    print(f"Running Step 1 (Per-Dataset Analysis) for '{scenario}'...")
    try:
        step_id = '3a' if scenario == 'all' else '1'
        _ = pm.execute_notebook(
            "stats_step_1_analysis_per_dataset.ipynb",
            f"stats/{scenario}/{scenario}_step_1_analysis_per_dataset_output.ipynb",
            parameters=dict(scenario=scenario, alpha=alpha)
        )
        step_1_dfs_f1.append(pd.read_csv(f"stats/{scenario}/{scenario}_step_{step_id}_per_dataset_stat_results_f1.csv"))
        step_1_dfs_time.append(pd.read_csv(f"stats/{scenario}/{scenario}_step_{step_id}_per_dataset_stat_results_time.csv"))
    except Exception as e:
        print("Error in Step 1 (Per-Dataset Analysis):", e)
    
    # Step 2: Across-Datasets Analysis
    print(f"Running Step 2 (Across-Datasets Analysis) for '{scenario}'...")
    try:
        step_id = '3b' if scenario == 'all' else '2'
        _ = pm.execute_notebook(
            "stats_step_2_analysis_across_datasets.ipynb",
            f"stats/{scenario}/{scenario}_step_2_analysis_across_datasets_output.ipynb",
            parameters=dict(scenario=scenario, alpha=alpha)
        )
        step_2_dfs_f1.append(pd.read_csv(f"stats/{scenario}/{scenario}_step_{step_id}_across_datasets_stat_results_f1.csv"))
        step_2_dfs_time.append(pd.read_csv(f"stats/{scenario}/{scenario}_step_{step_id}_across_datasets_stat_results_time.csv"))
    except Exception as e:
        print("Error in Step 2 (Across-Datasets Analysis):", e)

Running Step 1 (Per-Dataset Analysis) for 'binary'...


Executing:   0%|          | 0/37 [00:00<?, ?cell/s]

Running Step 2 (Across-Datasets Analysis) for 'binary'...


Executing:   0%|          | 0/36 [00:00<?, ?cell/s]

Running Step 1 (Per-Dataset Analysis) for 'multiclass'...


Executing:   0%|          | 0/37 [00:00<?, ?cell/s]

Running Step 2 (Across-Datasets Analysis) for 'multiclass'...


Executing:   0%|          | 0/36 [00:00<?, ?cell/s]

Running Step 1 (Per-Dataset Analysis) for 'multilabel_native'...


Executing:   0%|          | 0/37 [00:00<?, ?cell/s]

Running Step 2 (Across-Datasets Analysis) for 'multilabel_native'...


Executing:   0%|          | 0/36 [00:00<?, ?cell/s]

Running Step 1 (Per-Dataset Analysis) for 'multilabel_powerset'...


Executing:   0%|          | 0/37 [00:00<?, ?cell/s]

Running Step 2 (Across-Datasets Analysis) for 'multilabel_powerset'...


Executing:   0%|          | 0/36 [00:00<?, ?cell/s]

Running Step 1 (Per-Dataset Analysis) for 'all'...


Executing:   0%|          | 0/37 [00:00<?, ?cell/s]

Running Step 2 (Across-Datasets Analysis) for 'all'...


Executing:   0%|          | 0/36 [00:00<?, ?cell/s]

In [5]:
# Step 1: Per-Dataset Analysis (consolidate stats for scenario != 'all')
df_step_1_f1 = pd.concat(step_1_dfs_f1, axis=0)
df_step_1_f1 = df_step_1_f1[df_step_1_f1["scenario"] != "all"]
df_step_1_f1.to_csv("stats/step_1_per_dataset_stat_results_f1_condensed.csv", index=False)
df_step_1_time = pd.concat(step_1_dfs_time, axis=0)
df_step_1_time = df_step_1_time[df_step_1_time["scenario"] != "all"]
df_step_1_time.to_csv("stats/step_1_per_dataset_stat_results_time_condensed.csv", index=False)

# Step 2: Across-Datasets Analysis (consolidate stats for scenario != 'all')
df_step_2_f1 = pd.concat(step_2_dfs_f1, axis=0)
df_step_2_f1 = df_step_2_f1[df_step_2_f1["scenario"] != "all"]
df_step_2_f1.to_csv("stats/step_2_across_datasets_stat_results_f1_condensed.csv", index=False)
df_step_2_time = pd.concat(step_2_dfs_time, axis=0)
df_step_2_time = df_step_2_time[df_step_2_time["scenario"] != "all"]
df_step_2_time.to_csv("stats/step_2_across_datasets_stat_results_time_condensed.csv", index=False)

# Step 3: All-Datasets Analysis (consolidate stats for scenario == 'all')
df_step_3_f1 = pd.concat(step_2_dfs_f1, axis=0)
df_step_3_f1 = df_step_3_f1[df_step_3_f1["scenario"] == "all"]
df_step_3_f1.to_csv("stats/step_3_all_datasets_stat_results_f1.csv", index=False)
df_step_3_time = pd.concat(step_2_dfs_time, axis=0)
df_step_3_time = df_step_3_time[df_step_3_time["scenario"] == "all"]
df_step_3_time.to_csv("stats/step_3_all_datasets_stat_results_time.csv", index=False)