In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# switch to the project directory
%cd ../..
# working directory should be ../pdi

In [None]:
import sys
import os
module_path = os.path.abspath('src')

if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import json
import os
from collections import defaultdict

SWEEP_METADATA_FILE = "results/attention_dann_hyperparameter_tuning/sweep_118f437672375fa45c5e417106c304a1/sweep_metadata.json"
with open(SWEEP_METADATA_FILE, "r") as f:
    sweep_metadata = json.load(f)

alpha_parameters = defaultdict(list)

for target_code, experiments in sweep_metadata.items():
    for experiment in experiments:
        base_dir = experiment["base_dir"]
        sweep_config = experiment["sweep_config"]

        alpha = sweep_config["model"]["attention_dann"]["alpha"]

        alpha_parameters[alpha].append(base_dir)

print("Alpha Parameters:")
for alpha, dirs in alpha_parameters.items():
    print(f"  Alpha {alpha}: {dirs}")

save_dir = "reports"
os.makedirs(save_dir, exist_ok=True)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

alpha_f1_scores = {}
alpha_domain_accuracies = {}
alpha_f1_stats = {}
alpha_domain_stats = {}

for alpha, base_dirs in alpha_parameters.items():
    f1_scores = []
    domain_accuracies = []

    for base_dir in base_dirs:
        metrics_path = os.path.join(base_dir, "validation_metrics.csv")
        if not os.path.exists(metrics_path):
            print(f"Metrics file not found: {metrics_path}")
            continue

        df = pd.read_csv(metrics_path)

        if "val/f1" in df.columns and "val/domain/accuracy" in df.columns:
            max_f1_row = df.loc[df["val/f1"].idxmax()]
            f1 = max_f1_row["val/f1"]
            domain_accuracy = max_f1_row["val/domain/accuracy"]

            f1_scores.append(f1)
            domain_accuracies.append(domain_accuracy)

    if f1_scores and domain_accuracies:
        alpha_f1_stats[alpha] = (np.mean(f1_scores), np.std(f1_scores))
        alpha_domain_stats[alpha] = (np.mean(domain_accuracies), np.std(domain_accuracies))

sorted_alphas = sorted(alpha_f1_stats.keys())
sorted_f1_means = [alpha_f1_stats[alpha][0] for alpha in sorted_alphas]
sorted_f1_stds = [alpha_f1_stats[alpha][1] for alpha in sorted_alphas]
sorted_domain_means = [alpha_domain_stats[alpha][0] for alpha in sorted_alphas]
sorted_domain_stds = [alpha_domain_stats[alpha][1] for alpha in sorted_alphas]

plt.figure(figsize=(10, 5))
plt.errorbar(sorted_alphas, sorted_f1_means, yerr=sorted_f1_stds, fmt='-o', label="F1 Score", capsize=5)
plt.xlabel("Alpha")
plt.ylabel("F1 Score")
plt.title("F1 Score vs Alpha (with error bars)")
plt.grid(True)
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.errorbar(sorted_alphas, sorted_domain_means, yerr=sorted_domain_stds, fmt='-o', color='orange', label="Domain Accuracy", capsize=5)
plt.xlabel("Alpha")
plt.ylabel("Domain Accuracy")
plt.grid(True)
plt.legend()
plt.show()