## Importances
Use this notebook to visualize parameter importance. Here we leverage the fANOVA features importance functionality provided by Optuna.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB.tb_highlight = "bg:#3e0054"

In [None]:
from pathlib import Path

from slugify import slugify

from syftr.configuration import cfg

STUDY_NAMES = [
    "rank1--rag-and-agents--bright_hf",
    "rank1--rag-and-agents--crag_hf-music",
    "rank1--rag-and-agents--crag_hf-sports",
    "rank1--rag-and-agents--drdocs_hf",
    "rank1--rag-and-agents--financebench_hf",
    "rank1--rag-and-agents--hotpotqa_hf-train_hard",
    "rank1--rag-and-agents--infinitebench_hf",
    "rank1--rag-and-agents--multihoprag_hf",
    "rank1--rag-and-agents--phantomwikiv050_hf",
    "rank2--rag-and-agents--bright_hf",
    "rank2--rag-and-agents--crag_hf-music",
    "rank2--rag-and-agents--crag_hf-sports",
    "rank2--rag-and-agents--drdocs_hf",
    "rank2--rag-and-agents--financebench_hf",
    "rank2--rag-and-agents--hotpotqa_hf-train_hard",
    "rank2--rag-and-agents--infinitebench_hf",
    "rank2--rag-and-agents--multihoprag_hf",
    "rank2--rag-and-agents--phantomwikiv050_hf",
]
TARGET_NAMES = ["Accuracy", "Cost"]
STORAGE = cfg.postgres.get_optuna_storage()

RESULTS_DIR: Path = cfg.paths.results_dir
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
RESULTS_DIR = RESULTS_DIR.resolve()

In [None]:
def get_trial_value(trial, target_name):
    assert target_name in TARGET_NAMES
    if "acc" in target_name.lower():
        return trial.values[0]
    return trial.values[1]

In [None]:
import optuna
from optuna.visualization import plot_param_importances
from plotly.graph_objects import Figure
from IPython.display import Image, display

for study_name in STUDY_NAMES:
    print(f"Loading study '{study_name}'")
    study = optuna.load_study(study_name=study_name, storage=STORAGE)
    for target_name in TARGET_NAMES:
        print(f"Computing hyperparameter importances for target '{target_name}'")
        fig: Figure = plot_param_importances(
            study=study,
            target=lambda trial: get_trial_value(trial, target_name),
            target_name=target_name,
        )
        fig.update_layout(
            title=f"Hyperparameters that have the most Impact on {target_name} ({study_name})"
        )
        img_path = RESULTS_DIR / (slugify(f"{study_name}-importances-{target_name}") + ".png")
        fig.write_image(img_path)
        display(Image(filename=str(img_path)))

In [None]:
from syftr.optuna_helper import get_completed_trials


df = get_completed_trials(STUDY_NAMES)
for col in df.columns:
    if col.startswith("params_"):
        print(f"Unique values in {col}: {df[col].unique()}")
