In [12]:
import wandb
import re
from pprint import pprint
import pandas as pd

wandb.login()
api = wandb.Api()
experiments = api.runs("rom42pla_team/noisy_eeg")

In [49]:
table_rows = []
for experiment in experiments:
    date, hour, dataset, validation, signal_type, model = experiment.name.split(
        "_")
    df_experiment = experiment.history()
    runs = {col.split("-")[0] for col in df_experiment.columns if re.fullmatch(r"run_[0-9]+-.*", col)}
    avg_rows = []
    for run in runs:
        # prints all columns with the metrics of the run
        metrics = {col for col in df_experiment.columns if re.fullmatch(f"{run}-.*/.*", col)}
        # get the results for the run
        metrics_val = {col for col in df_experiment.columns if re.fullmatch(f"{run}-.*/val", col)}
        # gets the subset of dataframe for the run
        df_run = df_experiment[sorted(metrics_val)].dropna().reset_index(drop=True)
        df_run = df_run.rename(columns={col: col.split("-")[-1].split("/")[0] for col in df_run})
        # drop columns that end with "loss"
        df_run = df_run.loc[:, ~df_run.columns.str.endswith("loss")]
        # gets the best run based on the cls loss
        df_run = df_run.replace("NaN", None)
        best_row_i = df_run["cls_acc"].idxmin()
        best_row = df_run.iloc[best_row_i]
        avg_rows.append(best_row)
    df_rows = pd.DataFrame(avg_rows)
    rows_mean, rows_std = df_rows.mean(), df_rows.std()
    for df in [rows_mean, rows_std]:
        df["dataset"] = dataset
        df["validation"] = validation
        df["signal_type"] = (
            "$<$\\SI{100}{\\hertz}" if signal_type == "eeg" else "$>$\\SI{100}{\\hertz}"
        )
        df["model"] = model
    rows = pd.DataFrame({
        key: f"{mean * 100:.3f} ± {std*100:.3f}" if isinstance(mean, float) else mean
        for key, mean, std in zip(rows_mean.index, rows_mean, rows_std)
    }, index=[0]).iloc[0]
    table_rows.append(rows)
# merge each run's series
table = pd.DataFrame(table_rows, dtype="object")
# reorder the columns
first_cols = ["dataset", "signal_type", "validation", "model"]
table = table.sort_values(first_cols)
table = table[first_cols +
              [col for col in table.columns if col not in first_cols]]
# rename the columns
table = table.rename(columns={
    "dataset": "Dataset", 
    "signal_type": "Frequencies", 
    "validation": "Validation", 
    "model": "Model",
    "cls_acc": "cls Accuracy (\\%) $\\uparrow$",
    "cls_f1": "cls $F_1$ (\\%) $\\uparrow$",
    "ids_acc": "ids Accuracy (\\%) $\\uparrow$",
    "ids_f1": "ids $F_1$ (\\%) $\\uparrow$",
    })

In [50]:
for dataset in table["Dataset"].unique():
    dataset_table = table[table["Dataset"] == dataset]
    dataset_table = dataset_table.drop(columns=["Dataset"])
    print(f"TABLE FOR {dataset}:")
    print(dataset_table.to_latex(na_rep="-", index=False, multirow=True, multicolumn=True))

TABLE FOR deap:
\begin{tabular}{lllllll}
\toprule
Frequencies & Validation & Model & cls Accuracy (\%) $\uparrow$ & cls $F_1$ (\%) $\uparrow$ & ids Accuracy (\%) $\uparrow$ & ids $F_1$ (\%) $\uparrow$ \\
\midrule
$<$\SI{100}{\hertz} & kfold & dino & 72.270 ± 7.693 & 78.027 ± 5.150 & 98.467 ± 1.552 & 98.467 ± 1.552 \\
$<$\SI{100}{\hertz} & kfold & edpnet & 51.584 ± 9.413 & 41.740 ± 35.932 & 42.525 ± 34.240 & 42.525 ± 34.240 \\
$<$\SI{100}{\hertz} & kfold & eegnet & 57.741 ± 1.480 & 68.097 ± 1.902 & 32.109 ± 10.357 & 32.109 ± 10.357 \\
$<$\SI{100}{\hertz} & kfold & linear & 61.591 ± 1.400 & 72.892 ± 1.485 & 84.895 ± 8.352 & 84.895 ± 8.352 \\
$<$\SI{100}{\hertz} & kfold & mlp & 62.451 ± 1.069 & 71.847 ± 1.623 & 94.471 ± 4.616 & 94.471 ± 4.616 \\
$<$\SI{100}{\hertz} & loso & dino & 50.006 ± 4.422 & 53.962 ± 8.883 & - & - \\
$<$\SI{100}{\hertz} & loso & edpnet & 51.958 ± 5.071 & 57.351 ± 10.995 & - & - \\
$<$\SI{100}{\hertz} & loso & eegnet & 52.655 ± 4.687 & 60.981 ± 8.529 & - & - \\
$<$\S