# Hyperparameter Optimization Visualization

In [None]:
import os
import pickle
import pandas as pd
import optuna
import plotly.io as pio
import plotly.graph_objects as go
from optuna.trial import create_trial, TrialState
from optuna.distributions import (
    CategoricalDistribution,
    FloatDistribution,
    IntDistribution,
)

In [None]:
HE_RED = "#E2001A"
HE_BLUE = "#002D72"
HE_GRAY = "#5A5A5A"
HE_LIGHT_GRAY = "#D1D1D1"

pio.templates["esslingen_cd"] = go.layout.Template(
    layout=go.Layout(
        colorway=[HE_BLUE, HE_RED, HE_GRAY, "#2C74B3", "#A1C9F4"],
        font=dict(
            family="Arial, sans-serif",
            size=14,
            color=HE_BLUE,  # Use the dark blue for text
        ),
        title=dict(font=dict(size=20, color=HE_BLUE)),
        paper_bgcolor="white",
        plot_bgcolor="white",
        xaxis=dict(
            gridcolor=HE_LIGHT_GRAY, linecolor=HE_BLUE, zerolinecolor=HE_LIGHT_GRAY
        ),
        yaxis=dict(
            gridcolor=HE_LIGHT_GRAY, linecolor=HE_BLUE, zerolinecolor=HE_LIGHT_GRAY
        ),
    )
)

pio.templates.default = "esslingen_cd"

In [3]:
# Constants
RESULTS_DIR = "../ho_results"

RDC_PREFIX = "rdc_ho"
UIC_PREFIX = "uic_ho"

STUDY_FILE_SUFFIX = "_final.pickle"

In [4]:
def load_all_dataframes(prefix, results_dir=RESULTS_DIR):
    all_dfs: dict[str, pd.DataFrame] = {}

    for filename in sorted(os.listdir(results_dir)):
        if filename.startswith(prefix) and filename.endswith(".pickle"):
            file_path = os.path.join(results_dir, filename)
            try:
                with open(file_path, "rb") as f:
                    loaded_obj = pickle.load(f)
                    all_dfs[filename] = loaded_obj
            except Exception as e:
                print(f"- Could not load or process {filename}. Error: {e}")

    return all_dfs

In [5]:
def create_study_from_df(study_name, df):
    param_cols = {
        col: col.replace("params_", "")
        for col in df.columns
        if col.startswith("params_")
    }
    df.rename(columns=param_cols, inplace=True)
    clean_param_names = list(param_cols.values())

    distributions = {}
    for name in clean_param_names:
        series = df[name].dropna()
        if series.empty:
            continue

        unique_values = series.unique()

        try:
            sorted(unique_values)
            is_mixed_type = False
        except TypeError:
            is_mixed_type = True

        if is_mixed_type or series.dtype == "object":
            choices = sorted([str(x) for x in unique_values])
            distributions[name] = CategoricalDistribution(choices=choices)
        elif "int" in str(series.dtype) or (
            "float" in str(series.dtype)
            and series.nunique() <= 10
            and all(float(x).is_integer() for x in unique_values)
        ):
            distributions[name] = CategoricalDistribution(
                choices=sorted([int(x) for x in unique_values])
            )
        elif "int" in str(series.dtype):
            distributions[name] = IntDistribution(
                low=int(series.min()), high=int(series.max())
            )
        elif "float" in str(series.dtype):
            distributions[name] = FloatDistribution(
                low=float(series.min()), high=float(series.max())
            )

    trials = []
    for _, row in df.iterrows():
        params = {}
        for name in clean_param_names:
            if pd.notna(row[name]):
                if (
                    name in distributions
                    and isinstance(distributions[name], CategoricalDistribution)
                    and distributions[name].choices
                    and isinstance(distributions[name].choices[0], str)
                ):
                    params[name] = str(row[name])
                else:
                    params[name] = row[name]

        trial_distributions = {
            name: distributions[name] for name in params.keys() if name in distributions
        }
        trial = create_trial(
            state=TrialState.COMPLETE,
            value=row["value"],
            params=params,
            distributions=trial_distributions,
        )
        trials.append(trial)

    study = optuna.create_study(study_name=study_name, direction="maximize")
    study.add_trials(trials)
    return study

In [None]:
def set_log_scale_for_lr(fig, index, n_ticks=4):
    axis_name = f"xaxis{index}" if index > 1 else "xaxis"

    fig.update_layout(
        {axis_name: {"type": "log", "tickformat": ".0e", "nticks": n_ticks}}
    )

In [None]:
from plotly.subplots import make_subplots


def create_grid_slice_plot(study, top_params, vertical_spacing=0.1):
    n_params = len(top_params)
    cols = 3 if n_params > 4 else 2
    rows = (n_params + cols - 1) // cols

    fig = make_subplots(
        rows=rows,
        cols=cols,
        subplot_titles=top_params,
        vertical_spacing=vertical_spacing,
    )

    df = study.trials_dataframe()

    for i, param in enumerate(top_params):
        row = (i // cols) + 1
        col = (i % cols) + 1

        trace = go.Scatter(
            x=df[f"params_{param}"],
            y=df["value"],
            mode="markers",
            marker=dict(color=HE_BLUE, opacity=0.7),
        )

        fig.add_trace(trace, row=row, col=col)

        if "lr" in param or "learning_rate" in param:
            fig.update_xaxes(type="log", tickformat=".0e", row=row, col=col, nticks=3)

    fig.update_layout(
        showlegend=False,
        width=450 * cols,
        height=400 * rows,
        margin=dict(l=60, r=30, t=30, b=30),
    )

    for r in range(1, rows + 1):
        fig.update_yaxes(title_text="Objective Value", row=r, col=1)

    return fig

## 1. RDC Analysis

In [8]:
rdc_dfs_dict = load_all_dataframes(prefix=RDC_PREFIX)

### Individual Importance Plots (Per File)

These plots show the hyperparameter importance for each individual experimental run. This is the most accurate way to see which parameters were most influential in a specific context (e.g., for architecture tuning).

In [9]:
for filename, df in rdc_dfs_dict.items():
    study_name = filename.replace(STUDY_FILE_SUFFIX, "")

    temp_study = create_study_from_df(study_name, df.copy())

    fig = optuna.visualization.plot_param_importances(temp_study)
    fig.update_layout(title=f"Importance for: {study_name}", width=1000, height=500)
    fig.show()

[I 2025-08-27 08:09:24,892] A new study created in memory with name: rdc_ho_architecture_20250730-095643


[I 2025-08-27 08:09:35,537] A new study created in memory with name: rdc_ho_architecture_20250730-165632


[I 2025-08-27 08:09:35,700] A new study created in memory with name: rdc_ho_exploration_20250730-095800


[I 2025-08-27 08:09:36,094] A new study created in memory with name: rdc_ho_exploration_20250730-165724


[I 2025-08-27 08:09:36,525] A new study created in memory with name: rdc_ho_final_joint_20250731-081826


[I 2025-08-27 08:10:01,964] A new study created in memory with name: rdc_ho_learningrate_20250730-095559


[I 2025-08-27 08:10:07,562] A new study created in memory with name: rdc_ho_learningrate_20250730-165543


[I 2025-08-27 08:10:11,151] A new study created in memory with name: rdc_ho_replaybuffer_20250730-095727


[I 2025-08-27 08:11:01,202] A new study created in memory with name: rdc_ho_replaybuffer_20250730-165701


### Aggregated Analysis (All RDC Trials)

These plots combine all trials from all RDC runs to give a high-level overview of the entire optimization process.

In [10]:
rdc_aggregated_study = create_study_from_df(
    "RDC Aggregated", pd.concat(rdc_dfs_dict.values(), ignore_index=True)
)

[I 2025-08-27 08:11:41,629] A new study created in memory with name: RDC Aggregated


In [11]:
rdc_fig_hist = optuna.visualization.plot_optimization_history(rdc_aggregated_study)
rdc_fig_hist.update_layout(
    width=960,
    height=594,
    showlegend=False,
    title=None,
    margin=dict(l=60, r=30, t=30, b=60),
)
rdc_fig_hist.show()

In [None]:
rdc_full_params = {
    col: col.replace("params_", "")
    for col in rdc_aggregated_study.trials_dataframe().columns
    if col.startswith("params_")
}
rdc_full_params = list(rdc_full_params.values())

In [None]:
rdc_fig_slice_full = create_grid_slice_plot(
    study=rdc_aggregated_study, top_params=rdc_full_params, vertical_spacing=0.025
)
rdc_fig_slice_full.show()

In [None]:
rdc_top_params = ["rdc_gamma", "rdc_tau", "rdc_epsilon_decay", "rdc_batch_size"]

rdc_fig_slice_focused = create_grid_slice_plot(
    study=rdc_aggregated_study, top_params=rdc_top_params
)
rdc_fig_slice_focused.show()

## 2. UIC Analysis

In [17]:
import re
from datetime import datetime


def extract_timestamp(filename):
    match = re.search(r"(\d{8}-\d{6})", filename)
    if match:
        timestamp_str = match.group(1)
        return datetime.strptime(timestamp_str, "%Y%m%d-%H%M%S")
    return datetime.min

In [21]:
uic_dfs_dict = load_all_dataframes(prefix=UIC_PREFIX)
uic_dfs_dict = dict(
    sorted(
        uic_dfs_dict.items(), key=lambda item: extract_timestamp(item[0]), reverse=True
    )
)

### Individual Importance Plots (Per File)

In [None]:
for filename, df in uic_dfs_dict.items():
    study_name = filename.replace(STUDY_FILE_SUFFIX, "")
    temp_study = create_study_from_df(study_name, df.copy())

    fig = optuna.visualization.plot_param_importances(temp_study)
    fig.update_layout(title=f"Importance for: {study_name}", width=800, height=500)
    fig.show()

[I 2025-08-07 08:58:18,196] A new study created in memory with name: uic_ho_architecture_20250802-114625


[I 2025-08-07 08:58:18,562] A new study created in memory with name: uic_ho_architecture_20250803-202539


[I 2025-08-07 08:58:18,792] A new study created in memory with name: uic_ho_final_joint_20250804-183544


[I 2025-08-07 08:58:19,997] A new study created in memory with name: uic_ho_ppo-core_20250802-114428


[I 2025-08-07 08:58:20,522] A new study created in memory with name: uic_ho_ppo-core_20250803-202700


[I 2025-08-07 08:58:21,011] A new study created in memory with name: uic_ho_stability_20250802-114603


[I 2025-08-07 08:58:21,408] A new study created in memory with name: uic_ho_stability_20250803-202958


### Aggregated Analysis (All UIC Trials)

In [22]:
uic_aggregated_study = create_study_from_df(
    "UIC Aggregated", pd.concat(uic_dfs_dict.values(), ignore_index=True)
)

[I 2025-08-27 08:18:16,174] A new study created in memory with name: UIC Aggregated


### Optimization History

In [23]:
uic_fig_hist = optuna.visualization.plot_optimization_history(uic_aggregated_study)
uic_fig_hist.update_layout(
    width=960,
    height=594,
    showlegend=False,
    title=None,
    margin=dict(l=60, r=30, t=30, b=60),
)
uic_fig_hist.show()

### Slice Plot All Params

In [None]:
uic_full_params = {
    col: col.replace("params_", "")
    for col in uic_aggregated_study.trials_dataframe().columns
    if col.startswith("params_")
}
uic_full_params = list(uic_full_params.values())

In [None]:
uic_fig_slice_full = create_grid_slice_plot(
    study=uic_aggregated_study, top_params=uic_full_params, vertical_spacing=0.025
)
uic_fig_slice_full.show()

### Top 4 Params Slice Plot

In [None]:
uic_top_params = ["learning_rate", "n_steps", "gamma", "ent_coef"]

uic_fig_slice_focused = create_grid_slice_plot(
    study=uic_aggregated_study, top_params=uic_top_params
)
uic_fig_slice_focused.show()