# GEMSS tier results analysis: comparison of hyperparameters

This notebook loads and analyzes the aggregated results from the tiered experiments. 
It reads the `tier_summary_metrics.csv` file generated by the experiment runner and analyzes algorithm performance across different hyperparameter configurations.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os
from IPython.display import display, Markdown
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed

from gemss.experiment_assessment.experiment_results_visualizations import (
    THRESHOLDS_FOR_METRIC,
    analyze_metric_results,
    plot_metric_vs_hyperparam,
)
from gemss.experiment_assessment.experiment_results_analysis import (
    COVERAGE_METRICS,
    SOLUTION_OPTIONS,
    ALL_PARAMETERS,
    load_experiment_results,
    print_dataframe_overview,
    pivot_df_by_solution_type,
)

## Select tiers and load data

Specify the Tier IDs you want to analyze. The code assumes your results are stored in `../../scripts/results/tier{ID}/tier_summary_metrics.csv`.

In [None]:
tier_id_list = [1, 2, 3, 4, 5, 6, 7]

df, metric_cols = load_experiment_results(tier_id_list)

print_dataframe_overview(df)

In [None]:
# get the df pivoted by solution type
df_pivot = pivot_df_by_solution_type(df)

## Effect of hyperparameters

Compare the results of varying hyperparameters ``LAMBDA_JACCARD`` and ``BATCH_SIZE`` within relevant tiers.

In [None]:
hyperparam_list = [
    "LAMBDA_JACCARD",
    "BATCH_SIZE",
    "[NOISE_STD, NAN_RATIO] COMBINATION",
    "NOISE_STD",
    "NAN_RATIO",
    "SAMPLE_VS_FEATURE_RATIO",
    "BINARIZE",
]
select_metrics = [
    "Recall",
    "Precision",
    "F1_Score",
]
# select only those columns that contain one of the select_metrics
select_metric_cols = [
    col for col in metric_cols if any(m in col for m in select_metrics)
]
for hyperparam in hyperparam_list:
    for tier in tier_id_list:
        if df[df["TIER_ID"] == tier][hyperparam].nunique() > 1:
            df_grouped = (
                df[df["TIER_ID"] == tier].groupby(hyperparam)[select_metric_cols].mean()
            )
            display(Markdown(f"## **Tier {tier}**"))
            display(Markdown(f"### Effect of **{hyperparam}**"))
            display(df_grouped)

            plot_metric_vs_hyperparam(
                df_grouped=df_grouped,
                hyperparam=hyperparam,
                solution_options=SOLUTION_OPTIONS,
            )

    # draw a horizontal line
    display(Markdown("---"))

## Performance summary

In [None]:
df_thresholds = pd.DataFrame()
for metric, thresholds in THRESHOLDS_FOR_METRIC.items():
    if thresholds is not None:
        df_thresholds[metric] = pd.Series(thresholds)

display(Markdown(f"#### Performance thresholds for selected metrics"))
display(df_thresholds)

In [None]:
interact(
    analyze_metric_results,
    df=fixed(df),
    tier=widgets.SelectMultiple(
        options=tier_id_list,
        value=tier_id_list,
        description="Tier:",
    ),
    solution_type=widgets.Dropdown(
        options=sorted(SOLUTION_OPTIONS),
        value="outlier_STD_2.0",
        description="Solution:",
    ),
    metric_name=widgets.Dropdown(
        options=sorted(["Recall", "Precision", "F1_Score"]),
        value="Recall",
        description="Metric:",
    ),
    thresholds=fixed(None),
)