In [10]:
import pandas as pd
import numpy as np
import json
import os
from pathlib import Path

from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment

In [11]:
regime_df = pd.read_csv(
    "../../artifacts/reports/GBP_USD/ws5/learnable64/default/nc8/regime_assignments_26.csv"
)

In [12]:
metadata_dir = Path("../../artifacts/baseline_metadata/GBP_USD/ws5/learnable64/default/nc8")

baseline_path = metadata_dir / "regime_assignments_baseline.csv"
mapping_path = metadata_dir / "regime_label_mapping.json"

baseline_series = pd.read_csv(baseline_path)["Cluster_ID"].dropna().astype(int)
existing_mapping = {}

if mapping_path.exists():
    with open(mapping_path, "r", encoding="utf-8") as f:
        existing_mapping = json.load(f)
        existing_mapping = {int(k): int(v) for k, v in existing_mapping.items()}


In [13]:
# Step 1: Extract current cluster labels
current_series = regime_df["Cluster_ID"].dropna().astype(int)

# Step 2: Align series lengths
min_len = min(len(current_series), len(baseline_series))
baseline_trimmed = baseline_series.iloc[:min_len].to_numpy()
current_trimmed = current_series.iloc[:min_len].to_numpy()

# Step 3: Compute confusion matrix across shared label space
all_labels = sorted(set(current_trimmed) | set(baseline_trimmed))
conf_matrix = confusion_matrix(baseline_trimmed, current_trimmed, labels=all_labels)

# Step 4: Hungarian alignment
row_ind, col_ind = linear_sum_assignment(-conf_matrix)
new_mapping = {all_labels[col]: all_labels[row] for row, col in zip(row_ind, col_ind)}
# Convert to pure int dict for clean display
clean_mapping = {int(k): int(v) for k, v in new_mapping.items()}
# Step 5: Print comparison
print("🔁 Existing Mapping:", existing_mapping)
print("🔄 New Hungarian Mapping:", clean_mapping)

# Optional: check if mappings match
if new_mapping == existing_mapping:
    print("✅ Mappings are identical.")
else:
    print("⚠️ Mappings differ.")

🔁 Existing Mapping: {1: 0, 3: 1, 2: 2, 7: 3, 4: 4, 5: 5, 0: 6, 6: 7}
🔄 New Hungarian Mapping: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7}
⚠️ Mappings differ.
