In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# switch to the project directory
%cd ..
# working directory should be ../pdi

In [None]:
import sys
import os
module_path = os.path.abspath('src')

if module_path not in sys.path:
    sys.path.append(module_path)

#### How to use this notebook?
1. Train models with desired configs and use `scripts` subdirectory scripts to achieve that.
2. Fill `MODELS` dictionary with paths to the results dir of the run and name it appropriately as in dictionary element key.
3. Run desired plot/table generation cells. 

In [None]:
from pdi.constants import PART_NAME_TO_TARGET_CODE

MODELS = {
    "Attention": "results/attention_hyperparameter_tuning_outlier_filtering/proton/isolation_forest_f1_0_86",
}
target_code = PART_NAME_TO_TARGET_CODE["proton"]

save_dir = "reports"
os.makedirs(save_dir, exist_ok=True)

In [None]:
import json
from pdi.config import Config
from pdi.engines import build_engine
from pdi.results_and_metrics import TestResults
from pdi.data.data_preparation import DataPreparation
from pdi.data.types import Split

data_prep: DataPreparation = None
checksums = set()
test_results: dict[str, TestResults] = {}
for k, v in MODELS.items():
    with open(f"{v}/config.json", 'r') as f:
        config_data = json.load(f)
    config = Config.from_dict(config_data)
    config.training.device = "cpu"
    engine = build_engine(config, target_code, base_dir=v)
    current_data_prep = engine.get_data_prep()
    if data_prep is None:
        data_prep = current_data_prep
    checksums.add(current_data_prep._inputs_checksum)
    test_results[k] = engine.test(model_dirpath=v)

if len(checksums) > 1:
    raise RuntimeError("You shouldn't compare models trained on different datasets.")

In [None]:
test_results["nSigma"] = data_prep.get_nsigma_test_results(target_code, threshold_unscaled=3.0)

#### Extract Unwrapped and Unstandardized Test Split Data as a DataFrame
The test split data is explicitly obtained from the `CombinedDataLoader` to ensure consistency. Only the `CombinedDataLoader` has the knowledge of how to unwrap itself, and it will raise errors if the operation cannot be performed. While this could also be achieved by adding an additional method in the `DataPreparation` class, doing so would require `DataPreparation` to understand the internal structure of the `CombinedDataLoader`. This approach would also necessitate updates to `DataPreparation` whenever changes are made to the `CombinedDataLoader`. Therefore, the current approach is preferred for maintaining separation of concerns and avoiding unnecessary dependencies. It is also thousands times faster than iterating over and over dataloader and concatenating batches.

In [None]:
test_dl = data_prep.create_dataloaders(
    {
        Split.TEST: 1 # not used
    },
    {
        Split.TEST: 1 # not used
    },
    False, False)[Split.TEST]

test_data_unwrapped = test_dl.unwrap()
print(test_data_unwrapped.shape)
test_data_unwrapped.head()

#### Generate comparison table on optimal posterior probability threshold for f1

In [None]:
import pandas as pd

metrics_data = []
for model_name, test_result in test_results.items():
    metrics = test_result.test_metrics.to_dict()
    metrics['Model'] = model_name
    metrics_data.append(metrics)

metrics_df = pd.DataFrame(metrics_data)
print(metrics_df)

# Save as LaTeX table
latex_table = metrics_df.to_latex(index=False)
with open(f"{save_dir}/test_metrics_comparison_{target_code}.tex", "w") as f:
    f.write(latex_table)

# Save as CSV file
metrics_df.to_csv(f"{save_dir}/test_metrics_comparison_{target_code}.csv", index=False)

print("LaTeX table and CSV file saved successfully.")

#### Precision-recall curve for changing posterior probability threshold

In [None]:
from pdi.visualise import plot_precision_recall_comparison
from pdi.data.data_exploration import generate_figure_thumbnails_from_iterator

pt_ranges = [
    (0., float("inf")),
    (0.,.5),
    (.5,1.),
    (1., 2.),
    (2., float("inf")),
]

figures = []

for pt_range in pt_ranges:
    mask = (test_data_unwrapped["fPt"] >= pt_range[0]) & (test_data_unwrapped["fPt"] < pt_range[1])
    figures.append((plot_precision_recall_comparison(test_results, title_suffix=f"p_t range [{pt_range[0]},{pt_range[1]})", mask=mask.to_numpy()), f"pt_{pt_range[0]}_{pt_range[1]}.png"))

generate_figure_thumbnails_from_iterator(figures, save_path=save_dir, thumbnail_width=600)

#### Evaluation metrics vs transverse momentum for optimal posterior probability threshold for f1

In [None]:
from pdi.visualise import plot_metrics_vs_pt_comparison

generate_figure_thumbnails_from_iterator(plot_metrics_vs_pt_comparison(test_results, pt=test_data_unwrapped["fPt"].to_numpy(), save_dir=save_dir), save_path=save_dir, thumbnail_width=600)