# GEMSS tier results analysis: comparison of hyperparameters

This notebook loads and analyzes the aggregated results from the tiered experiments. 
It reads the `tier_summary_metrics.csv` file generated by the experiment runner and analyzes algorithm performance across different hyperparameter configurations.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
from IPython.display import display, Markdown
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed

from gemss.diagnostics.experiment_results_visualizations import (
    THRESHOLDS_FOR_METRIC,
    plot_solution_grouped,
    plot_solution_comparison,
    plot_si_asi_scatter,
    analyze_metric_results,
    plot_heatmap,
    plot_metric_vs_hyperparam,
)

## Select tiers and load data

Specify the Tier IDs you want to analyze. The code assumes your results are stored in `../scripts/results/tier{ID}/tier_summary_metrics.csv`.

In [None]:
# tier_id_list = [3]
# tier_id_list = [7]
# tier_id_list = [1, 2, 3, 4]
tier_id_list = [1, 2, 3, 4, 5, 6, 7]

In [None]:
# Identify metric columns (those containing the base name of coverage metrics)
# List synchronized with keys returned by calculate_coverage_metrics in run_experiment.py
# All coverage metrics are numeric (possibly None)
COVERAGE_METRICS = [
    "Recall",
    "Precision",
    "F1_Score",
    "Jaccard",
    "Miss_Rate",
    "FDR",
    "Global_Miss_Rate",
    "Global_FDR",
    "Success_Index",
    "Adjusted_Success_Index",
]

SOLUTION_OPTIONS = [
    "full",
    "top",
    "outlier_STD_2.0",
    "outlier_STD_2.5",
    "outlier_STD_3.0",
]

potential_params = [
    "N_SAMPLES",
    "N_FEATURES",
    "SAMPLE_VS_FEATURE_RATIO",
    "SPARSITY",
    "N_GENERATING_SOLUTIONS",
    "N_CANDIDATE_SOLUTIONS",
    "NOISE_STD",
    "NAN_RATIO",
    "[NOISE_STD, NAN_RATIO] COMBINATION",
    "LAMBDA_JACCARD",
    "BINARY_RESPONSE_RATIO",
    "BATCH_SIZE",
    "BINARIZE",
]

In [None]:
df = pd.DataFrame()
for tier_id in tier_id_list:

    results_path = f"../../scripts/results/tier{tier_id}/tier_summary_metrics.csv"

    if os.path.exists(results_path):
        df_tier = pd.read_csv(results_path)
        print(
            f"Successfully loaded {len(df_tier)} experiment records from Tier {tier_id}."
        )
        # Ensure numeric columns are actually numeric
        metric_cols = [
            c for c in df_tier.columns if any(x in c for x in COVERAGE_METRICS)
        ]
        for col in metric_cols:
            if col in df_tier.columns:
                df_tier[col] = pd.to_numeric(df_tier[col], errors="coerce")

        # Add TIER_ID column
        df_tier["TIER_ID"] = int(tier_id)

        # Add EXPERIMENT_ID column: {tier_id}.{experiment_number_in_tier}
        df_tier["EXPERIMENT_ID"] = str(tier_id) + "." + (df_tier.index + 1).astype(str)

    else:
        print(f"ERROR: File not found at {results_path}")
        print("Please run the experiments for this tier first, or check the path.")
        df_tier = pd.DataFrame()

    # Append to main DataFrame
    df = pd.concat([df, df_tier], ignore_index=True)


# Add the "SAMPLE_VS_FEATURE_RATIO" column
df["SAMPLE_VS_FEATURE_RATIO"] = df["N_SAMPLES"] / df["N_FEATURES"]
# Add a feature that combines information about noise and missingness
df["[NOISE_STD, NAN_RATIO] COMBINATION"] = (
    "[" + df["NOISE_STD"].astype(str) + ", " + df["NAN_RATIO"].astype(str) + "]"
)

## Data Preview

In [None]:
display(Markdown(f"### All results for tiers: {tier_id_list}"))
# display(df)
display(Markdown(f"- **Total experiments:** {len(df)}"))
display(
    Markdown(
        f"- **{len(SOLUTION_OPTIONS)} solution types:** {', '.join(SOLUTION_OPTIONS)}"
    )
)
display(
    Markdown(
        f"- **{len(COVERAGE_METRICS)} available metrics:** {", ".join(COVERAGE_METRICS)}"
    )
)
display(Markdown(f"- **Total metrics columns:** {len(metric_cols)}"))

# Identify which parameters actually vary in this dataset
varied_params = [p for p in potential_params if p in df.columns and df[p].nunique() > 1]
unvaried_params = [
    p for p in potential_params if p in df.columns and p not in varied_params
]
display(Markdown(f"- **Varied Parameters:** {", ".join(varied_params)}"))

In [None]:
# get the df pivoted by solution type
df_pivot_solution = pd.DataFrame()
for solution in SOLUTION_OPTIONS:
    solution_cols = [col for col in df.columns if solution in col]
    df_solution = df[["TIER_ID"] + varied_params + solution_cols].copy()
    df_solution["TIER_ID"] = df_solution["TIER_ID"].astype(str)
    df_solution.rename(
        columns={col: col.replace(f"{solution}_", "") for col in solution_cols},
        inplace=True,
    )
    df_solution["solution_type"] = solution
    df_pivot_solution = pd.concat([df_pivot_solution, df_solution], ignore_index=True)

## Effect of hyperparameters

Compare the results of varying hyperparameters ``LAMBDA_JACCARD`` and ``BATCH_SIZE`` within relevant tiers.

In [None]:
hyperparam_list = [
    "LAMBDA_JACCARD",
    "BATCH_SIZE",
    "[NOISE_STD, NAN_RATIO] COMBINATION",
    "NOISE_STD",
    "NAN_RATIO",
    "SAMPLE_VS_FEATURE_RATIO",
]
select_metrics = [
    "Recall",
    "Precision",
    "F1_Score",
]
# select only those columns that contain one of the select_metrics
select_metric_cols = [
    col for col in metric_cols if any(m in col for m in select_metrics)
]
for hyperparam in hyperparam_list:
    for tier in tier_id_list:
        if df[df["TIER_ID"] == tier][hyperparam].nunique() > 1:
            df_grouped = (
                df[df["TIER_ID"] == tier].groupby(hyperparam)[select_metric_cols].mean()
            )
            display(Markdown(f"## **Tier {tier}**"))
            display(Markdown(f"### Effect of **{hyperparam}**"))
            display(df_grouped)

            plot_metric_vs_hyperparam(
                df_grouped=df_grouped,
                hyperparam=hyperparam,
                solution_options=SOLUTION_OPTIONS,
            )

    # draw a horizontal line
    display(Markdown("---"))

## Performance summary

In [None]:
df_thresholds = pd.DataFrame()
for metric, thresholds in THRESHOLDS_FOR_METRIC.items():
    if thresholds is not None:
        df_thresholds[metric] = pd.Series(thresholds)

display(Markdown(f"#### Performance thresholds for selected metrics"))
display(df_thresholds)

In [None]:
interact(
    analyze_metric_results,
    df=fixed(df),
    tier=widgets.SelectMultiple(
        options=tier_id_list,
        value=tier_id_list,
        description="Tier:",
    ),
    solution_type=widgets.Dropdown(
        options=sorted(SOLUTION_OPTIONS),
        value="outlier_STD_2.0",
        description="Solution:",
    ),
    metric_name=widgets.Dropdown(
        options=sorted(["Recall", "Precision", "F1_Score"]),
        value="Recall",
        description="Metric:",
    ),
    thresholds=fixed(None),
)