## Data
You can use this notebook to load optimization raw data, not filtered for successful trials or any other criteria.

In [None]:
%reload_ext autoreload
%autoreload 2

from IPython.core import ultratb

ultratb.VerboseTB._tb_highlight = "bg:#3e0054"

In [None]:
from syftr.configuration import cfg
from syftr.optuna_helper import get_study_names

INCLUDE_REGEX = [
    "silver1--.*",
]
EXCLUDE_REGEX = []

STORAGE = cfg.database.get_optuna_storage()

study_names = get_study_names(
    include_regex=INCLUDE_REGEX,
    exclude_regex=EXCLUDE_REGEX,
)

study_names

In [None]:
import optuna
import pandas as pd


def get_data(study_names):
    
    import concurrent.futures

    def load_study_trials(study_name):
        study = optuna.load_study(study_name=study_name, storage=STORAGE)
        df_trials: pd.DataFrame = study.trials_dataframe()
        df_trials["study_name"] = study_name
        return df_trials

    with concurrent.futures.ThreadPoolExecutor() as executor:
        dfs = list(executor.map(load_study_trials, study_names))

    df = pd.concat(dfs, ignore_index=True)
    return df


df = get_data(study_names)

print(f"The given set of studies contains {len(df["user_attrs_flow"].unique())} unique flows.")

df

In [None]:
def get_column_names_like(df: pd.DataFrame, names: str | list[str]) -> list[str]:
    names = names if isinstance(names, list) else [names]
    col_names = []
    for name in names:
        is_match = df.columns.str.contains(name, case=False, regex=True)
        col_names.extend(list(df.columns[is_match]))
    return col_names


In [None]:
get_column_names_like(df, ["fail", "message", "state"])

In [None]:
df_failed = df[df["user_attrs_metric_failed"] == True]
df_failed.dropna(axis=1, how='all', inplace=True)
df_failed 

In [None]:
import json
from pprint import pprint

for _, row in df_failed.iterrows():
    print("Exception: ", row["user_attrs_metric_exception_class"])
    print("-" * 80)
    print("Message: ", row["user_attrs_metric_exception_message"])
    print("-" * 80)
    print("Traceback: ", row["user_attrs_metric_exception_stacktrace"])
    print("-" * 80)
    flow_str = row["user_attrs_flow"]
    flow = json.loads(flow_str)
    pprint(flow)

    print("=" * 80)

In [None]:
error_message_substring = "Too few successful evaluations"

errored_configs = []
for _, row in df_failed.iterrows():
    if error_message_substring in row["user_attrs_metric_exception_message"]:
        flow_str = row["user_attrs_flow"]
        flow = json.loads(flow_str)
        pprint(flow)
        errored_configs.append(flow)

df_errored_configs = pd.DataFrame(errored_configs)
df_errored_configs