# Import libraries

In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
# pio.templates.default = "plotly_white"

# Load the csv file with results into a pandas dataframe

In [None]:
df = pd.read_csv("../results/regularizing_sweep.csv")
df.head()

# Draw boxplots to compare hyperparameter settings

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

hyperparameters = ["architecture", "gnn_layers", "hidden_channels", "jk", "pool", "activation", "norm", "transform", "dropout"]
metrics_to_plot = ["good_within.99", "good_within.95"]

df = df.fillna(value="none")

for hyperparameter in hyperparameters:
    # Create a temporary dataframe for plotting, dropping rows where the hyperparameter is null
    plot_df = df

    # Filter out some values
    # plot_df = plot_df[~plot_df["pool"].isin(["median"])]
    # # plot_df = plot_df[plot_df["gnn_layers"] == 5]
    # plot_df = plot_df[plot_df["hidden_channels"] == 64]

    # Get the actual categories present in the data
    actual_categories = sorted(plot_df[hyperparameter].unique())

    # Create subplots
    fig = make_subplots(
        rows=1, cols=2,
        subplot_titles=(f"Metrics vs {hyperparameter}", f"Duration vs {hyperparameter}"),
        horizontal_spacing=0.1
    )

    # Add main metrics to left subplot
    for i, metric in enumerate(metrics_to_plot):
        fig.add_trace(
            go.Box(x=plot_df[hyperparameter], y=plot_df[metric],
                   name=metric, boxmean=True,
                   marker_color=px.colors.qualitative.Plotly[i],
                   offsetgroup=str(i)),
            row=1, col=1
        )

    # Add duration metric to right subplot
    fig.add_trace(
        go.Box(x=plot_df[hyperparameter], y=plot_df['duration'],
               name='duration', boxmean=True,
               marker_color=px.colors.qualitative.Plotly[2],
               showlegend=False,
               offsetgroup='duration',
               width=0.6),
        row=1, col=2
    )

    # Update layout for both subplots
    fig.update_xaxes(
        type='category',
        categoryorder='array',
        categoryarray=actual_categories,
        row=1, col=1
    )
    fig.update_xaxes(
        type='category',
        categoryorder='array',
        categoryarray=actual_categories,
        row=1, col=2
    )

    fig.update_layout(
        title=f"Analysis of {hyperparameter}",
        height=400,
        boxmode='group',
        boxgap=0.2,
        boxgroupgap=0.1
    )
    fig.show()

# Best model for specified hyperparameter value

In [None]:
hyperparameter = "architecture"
metric = "good_within.99"

best_runs = df.loc[df.groupby(hyperparameter)[metric].idxmax()]
best_runs.head()