In [12]:
# RareEvent_BreedAndBattle_WithArena.ipynb (converted to .py for clarity)

# 1. Imports and Setup
import os
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import seaborn as sns
import glob

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    average_precision_score, roc_auc_score,
    precision_recall_curve, roc_curve
)
from sklearn.linear_model import LogisticRegression

sns.set(style="whitegrid")

# 2. Load Synthetic Datasets

def load_datasets(path="./synthetic_datasets/"):
    dataset_paths = sorted(glob.glob(os.path.join(path, "*.csv")))
    datasets = []
    for file in dataset_paths:
        df = pd.read_csv(file)
        X = df.drop("rare_event", axis=1)
        y = df["rare_event"]
        datasets.append((X, y, os.path.basename(file)))
    return datasets

datasets = load_datasets()
print(f"✅ Loaded {len(datasets)} datasets.")

# 3. Arena Evaluation Function

def evaluate_model_on_all_datasets(model, datasets):
    pr_aucs = []
    roc_aucs = []

    for X, y, name in datasets:
        try:
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, stratify=y, test_size=0.3, random_state=42
            )
            model.fit(X_train, y_train)
            y_prob = model.predict_proba(X_test)[:, 1]

            pr_auc = average_precision_score(y_test, y_prob)
            roc_auc = roc_auc_score(y_test, y_prob)

            pr_aucs.append(pr_auc)
            roc_aucs.append(roc_auc)

        except Exception as e:
            print(f"⚠️ Model failed on dataset {name}: {e}")

    return {
        "avg_pr_auc": np.mean(pr_aucs) if pr_aucs else 0.0,
        "avg_roc_auc": np.mean(roc_aucs) if roc_aucs else 0.0
    }

# 4. Define Evolving Model Class
class EvolvingModel:
    def __init__(self, name):
        self.name = name
        self.model = LogisticRegression(max_iter=1000, class_weight="balanced")
        self.fitness = 0.0
        self.meta = {}

# 5. Initialize Model Population
model_population = [EvolvingModel(f"Model_{i}") for i in range(5)]

# 6. Evaluate Models
for model in model_population:
    scores = evaluate_model_on_all_datasets(model.model, datasets)
    model.fitness = scores["avg_pr_auc"]
    model.meta = scores

# 7. Leaderboard
print("\n🏁 Arena Leaderboard:")
for model in sorted(model_population, key=lambda m: m.fitness, reverse=True):
    print(f"{model.name} → PR AUC: {model.meta['avg_pr_auc']:.3f} | ROC AUC: {model.meta['avg_roc_auc']:.3f}")


✅ Loaded 5 datasets.

🏁 Arena Leaderboard:
Model_0 → PR AUC: 0.212 | ROC AUC: 0.749
Model_1 → PR AUC: 0.212 | ROC AUC: 0.749
Model_2 → PR AUC: 0.212 | ROC AUC: 0.749
Model_3 → PR AUC: 0.212 | ROC AUC: 0.749
Model_4 → PR AUC: 0.212 | ROC AUC: 0.749
