In [35]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))  # go up one level

import glob
import pandas as pd
from sklearn.model_selection import train_test_split

from models.base_models import get_base_models
from models.core_models import make_child_model
from utils.resampling_registry import get_resamplers
from utils.scoring import score_model
from utils.battle_logger import BattleLogger

In [37]:
# FORMER WORKING 
# # 📂 Load all datasets
# def load_all_datasets(folder="./synthetic_datasets"):
#     files = sorted(glob.glob(os.path.join(folder, "*.csv")))
#     datasets = []
#     for f in files:
#         df = pd.read_csv(f)
#         X = df.drop("rare_event", axis=1)
#         y = df["rare_event"]
#         datasets.append((os.path.basename(f), X, y))
#     return datasets

# datasets = load_all_datasets()

def load_all_datasets(folder="./synthetic_datasets"):
    files = sorted(glob.glob(os.path.join(folder, "*.csv")))
    datasets = []
    for f in files:
        df = pd.read_csv(f)
        X = df.drop("rare_event", axis=1)
        y = df["rare_event"]
        # you might need to compute actual baseline PR AUC here
        baseline = y.mean()  # crude placeholder
        datasets.append({
            "name": os.path.basename(f),
            "X": X,
            "y": y,
            "baseline_pr_auc": baseline
        })
    return datasets

In [31]:
# 🧠 Run models across datasets and resamplers
results = []

with BattleLogger(
    to_file="logs/battle_log.txt",
    js_file="logs/battle_log_data.js",
    inject_html=True,
    html_template="battle_template.html",
    html_output="battle_arena.html"
):
    for model_cfg in get_base_models():
        model_name = model_cfg["name"]
        model = model_cfg["model"]

        for dataset_name, X, y in datasets:
            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
            resamplers = get_resamplers(X_train, y_train, target_col="rare_event")

            for resampler_name, resample_func in resamplers.items():
                try:
                    X_res, y_res = resample_func()
                    model.fit(X_res, y_res)
                    y_prob = model.predict_proba(X_test)[:, 1]

                    scores = score_model(y_test, y_prob)
                    scores.update({
                        "model": model_name,
                        "resampler": resampler_name,
                        "dataset": dataset_name
                    })
                    print(f"[✅] {model_name} + {resampler_name} on {dataset_name} → PR AUC: {scores['pr_auc']:.3f}")
                    results.append(scores)

                except Exception as e:
                    print(f"[⚠️] Failed: {model_name} + {resampler_name} on {dataset_name}: {e}")



In [33]:
# 💾 Save scores
df_results = pd.DataFrame(results)
os.makedirs("logs", exist_ok=True)
df_results.to_csv("logs/leaderboard.csv", index=False)
print("✅ Scores saved to logs/leaderboard.csv")

✅ Scores saved to logs/leaderboard.csv
