# Description

- Evaluate multiple runs from DAG modeling pipelines
- Calculate top-level stats and adjust for multiple hypothesis testing

# Imports

In [None]:
%load_ext autoreload
%autoreload 2


import pandas as pd

import automl.notebooks.utils as amlnut
import core.config as cfg
import core.config_builders as cfgb
import core.plotting as plot
import core.statistics as stats
import core.timeseries_study as tss

# Load config

In [None]:
eval_config = cfgb.get_config_from_env()

In [None]:
# if eval_config is None:
if True:
    eval_config = cfgb.get_config_from_nested_dict(
        {
            "results_directory": "/data/automl/experiments/basf/RH_1",
            "metrics": {
                "perf_": stats.compute_annualized_sharpe_ratio,
                "ttest_": stats.ttest_1samp,
                "moments_": stats.compute_moments,
            },
            "perform_global_adj": {"pval_col": "ttest_pval",},
            "global_adj_pval_threshold": 0.2,
            "perform_family_adj": {"pval_col": "ttest_pval", "tag_col": "tag",},
            "perform_family_selection": {
                "tag_col": "tag",
                "sr_col": "perf_ann_sharpe",
                "adj_pval_threshold": 0.2,
                "ann_sharpe_threshold": 0.7,
                "num": 10,
            },
            "tags": (
                "rh1",
                "rh2",
                "rh3",
                "rh4",
                "rh5",
                "rh6",
                "rh7",
                "rh8",
                "rh9",
                "rh10",
                "rh11",  # Selected (Propylene)
                "rh12",
                "rh13",  # Selected (Butadiene)
                "rh14",
                "rh15",  # Selected (Butadiene)
                "butadiene",
            ),
            "pipeline_stages_to_plot": {
                "sig/load_data",
                "sig/dropna",
                "rets/clip",
                "model/ml",
            },
        }
    )

In [None]:
eval_config

# Load pipeline run configs

In [None]:
# Load configs before loading (larger) result_bundles.
config_dict = amlnut.load_files(eval_config["results_directory"], "config.pkl")

In [None]:
# Create config dataframe.
configs = cfg.convert_to_dataframe(config_dict.values())

In [None]:
configs.head(3)

In [None]:
# Create dataframe of config diffs.
diffs = cfg.diff_configs(config_dict.values())
config_diffs = cfg.convert_to_dataframe(diffs).dropna(how="all", axis=1)

In [None]:
config_diffs.head(3)

# Load pipeline result bundles

In [None]:
result_bundles = amlnut.load_files(
    eval_config["results_directory"], "result_bundle.pkl"
)

In [None]:
len(result_bundles)

# Calculate metrics and performance statistics

In [None]:
pnls = amlnut.compute_pnl_v2(result_bundles)

In [None]:
# TODO: Add a progress bar.
pnl_stats = tss.map_dict_to_dataframe(
    pnls, eval_config["metrics"].to_dict()
).transpose()

In [None]:
pnl_stats.head(3)

## Global adjustment

In [None]:
pnl_stats_with_adj = amlnut.perform_global_adj(
    pnl_stats, **eval_config["perform_global_adj"].to_dict()
)

In [None]:
pnl_stats_with_adj

In [None]:
results_df = pd.concat([pnl_stats_with_adj, config_diffs], axis=1)

In [None]:
results_df.head()

In [None]:
plot.multipletests_plot(
    results_df["global_adj_pval"].dropna(),
    threshold=eval_config["global_adj_pval_threshold"],
)

## Family-wise adjustment

In [None]:
results_df = amlnut.perform_family_adj(
    results_df, **eval_config["perform_family_adj"].to_dict()
)

In [None]:
results_df.columns

In [None]:
results_df.head(3)

In [None]:
#For some reasons there is no _adj_pval for 'butadiene'. Might need tfixing
adj_cols = []
for tag in eval_config["tags"][:-1]:
    adj_cols.append(tag + "_adj_pval")

In [None]:
plot.multipletests_plot(
    results_df[adj_cols],
    threshold=eval_config["global_adj_pval_threshold"],
    num_cols=3
)

## Plot selected PnL

In [None]:
selected = amlnut.perform_family_selection(
    results_df, pnls, **eval_config["perform_family_selection"].to_dict()
)

In [None]:
selected.keys()

In [None]:
# TODO(2752): Make these fancy.
#
# for tag in selected.keys():
#    selected[tag].cumsum().plot()