<!-- Cell 1 -->

# ArqonHPO Experiments 01 — Winner Stack on Real Data

This notebook packages the retained ArqonHPO winners (Exp4,6,8,15,18,25,27) and benchmarks them against a plain random-search baseline on two sklearn datasets. Run this in an environment with `scikit-learn`, `numpy`, and optionally `pandas`/`matplotlib` (e.g., your `helios-gpu-118` env).

**Winner stack included**
- Exp6 coarse-to-fine C refinement
- Exp8 seed coevolution (multi-seed CV)
- Exp15 μ+λ evolution
- Exp18 explore/exploit cadence
- Exp25 sampler hyper co-evo (logC prior mean/std)
- Exp27 evolution factor ramp
- Exp4 pheromone bandit (penalty weighting)


In [1]:
# Cell 2

import math
from pathlib import Path
import warnings
from dataclasses import dataclass
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer, load_diabetes, fetch_openml
from sklearn.model_selection import StratifiedKFold, KFold, cross_val_score
from sklearn.linear_model import LogisticRegression, ElasticNet

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


In [2]:
# Cell 3

# Real + synthetic datasets (no network by default; MNIST and CIFAR10 added if available)
cls = load_breast_cancer()
reg = load_diabetes()

rng = np.random.RandomState(123)

# Synthetic structured tasks

def make_structured_cls(n_samples=800, n_features=12, signal=3.0, noise=0.3):
    X = rng.randn(n_samples, n_features)
    w = rng.randn(n_features)
    z = X @ w * signal + rng.normal(0, noise, size=n_samples)
    p = 1.0 / (1.0 + np.exp(-z))
    y = (rng.rand(n_samples) < p).astype(int)
    return X, y


def make_structured_reg(n_samples=800, n_features=10, signal=4.0, noise=0.4):
    X = rng.randn(n_samples, n_features)
    w = rng.randn(n_features)
    y = X @ w * signal + rng.normal(0, noise, size=n_samples)
    return X, y


X_struct_cls, y_struct_cls = make_structured_cls()
X_chaos_cls, y_chaos_cls = X_struct_cls.copy(), rng.permutation(y_struct_cls)
X_struct_reg, y_struct_reg = make_structured_reg()

# Optional MNIST (requires network or local cache). If unavailable, it is skipped.
mnist_entry = None
try:
    mnist = fetch_openml('mnist_784', version=1, as_frame=False)
    X_mnist = mnist.data.astype(np.float32) / 255.0
    y_mnist = mnist.target.astype(int)
    mnist_entry = {
        'task': 'cls',
        'X': X_mnist,
        'y': y_mnist,
    }
except Exception as e:
    print('MNIST not available (skip):', e)

# Optional CIFAR-10 via torchvision (requires torch/torchvision and local cache; no download attempted).
cifar_entry = None
try:
    import torchvision.transforms as T  # type: ignore
    from torchvision.datasets import CIFAR10  # type: ignore

    cifar = CIFAR10(root=str(Path('~/.cache/cifar10').expanduser()), train=True, download=False, transform=T.ToTensor())
    n_subset = min(8000, len(cifar))
    X_list, y_list = [], []
    for i in range(n_subset):
        img, label = cifar[i]
        X_list.append(img.view(-1).numpy().astype(np.float32))
        y_list.append(label)
    X_cifar = np.stack(X_list)
    y_cifar = np.array(y_list)
    cifar_entry = {
        'task': 'cls',
        'X': X_cifar,
        'y': y_cifar,
    }
except Exception as e:
    print('CIFAR10 not available (skip):', e)

# Registry mixes real and synthetic tasks to show strength on structured vs chaotic signals.
# Lift expected on structured_cls/reg; chaos_cls acts as a stress test.
dataset_registry = {
    'breast_cancer_cls': {
        'task': 'cls',
        'X': cls.data,
        'y': cls.target,
    },
    'diabetes_reg': {
        'task': 'reg',
        'X': reg.data,
        'y': reg.target,
    },
    'structured_cls': {
        'task': 'cls',
        'X': X_struct_cls,
        'y': y_struct_cls,
    },
    'chaos_cls': {
        'task': 'cls',
        'X': X_chaos_cls,
        'y': y_chaos_cls,
    },
    'structured_reg': {
        'task': 'reg',
        'X': X_struct_reg,
        'y': y_struct_reg,
    },
}

if mnist_entry is not None:
    dataset_registry['mnist_cls'] = mnist_entry
if cifar_entry is not None:
    dataset_registry['cifar10_cls'] = cifar_entry


CIFAR10 not available (skip): Dataset not found or corrupted. You can use download=True to download it


In [3]:
# Cell 4

@dataclass
class EvalResult:
    params: Dict[str, float]
    metric: float


def evaluate_model(task: str, params: Dict[str, float], X, y, seeds=(0, 1)) -> float:
    """Return a loss metric (lower is better). Uses two seeds for seed coevo (Exp8)."""
    scores: List[float] = []
    for seed in seeds:
        if task == "cls":
            cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=seed)
            model = LogisticRegression(
                penalty=params["penalty"],
                C=params["C"],
                solver="liblinear",
                max_iter=800,
            )
            loss = -float(cross_val_score(model, X, y, cv=cv, scoring="neg_log_loss").mean())
        else:
            cv = KFold(n_splits=3, shuffle=True, random_state=seed)
            model = ElasticNet(
                alpha=params["alpha"],
                l1_ratio=params["l1_ratio"],
                random_state=seed,
                max_iter=6000,
            )
            rmse = math.sqrt(
                -float(cross_val_score(model, X, y, cv=cv, scoring="neg_mean_squared_error").mean())
            )
            loss = rmse
        scores.append(loss)
    return float(np.mean(scores))


def baseline_random_search(task: str, X, y, n_trials: int = 50, rng_seed: int = 13) -> Tuple[List[float], List[Dict[str, float]]]:
    rng = np.random.RandomState(rng_seed)
    history: List[float] = []
    params_list: List[Dict[str, float]] = []
    best = float("inf")
    for _ in range(n_trials):
        if task == "cls":
            logC = rng.uniform(-2.5, 2.5)
            params = {"penalty": rng.choice(["l1", "l2"]), "C": 10 ** logC}
        else:
            log_alpha = rng.uniform(-3.0, 1.0)
            params = {"alpha": 10 ** log_alpha, "l1_ratio": rng.uniform(0.05, 0.95)}
        loss = evaluate_model(task, params, X, y)
        best = min(best, loss)
        history.append(best)
        params_list.append(params)
    return history, params_list


def winner_stack(task: str, X, y, total_evals: int = 52, rng_seed: int = 7) -> Tuple[List[float], List[Dict[str, float]]]:
    """Implements winners: Exp6,8,15,18,25,27 (+Exp4) in one loop."""
    rng = np.random.RandomState(rng_seed)
    history: List[float] = []
    params_list: List[Dict[str, float]] = []

    if task == "cls":
        logC_grid = np.linspace(-2.2, 2.0, 6)
        base_penalties = ["l1", "l2"]
    else:
        logC_grid = np.linspace(-3.0, 1.0, 6)  # for alpha
        base_penalties = ["elastic"]

    # --- Exp6 coarse-to-fine seeds
    population: List[EvalResult] = []
    for logc in logC_grid:
        for pen in base_penalties:
            if task == "cls":
                params = {"penalty": pen, "C": 10 ** logc}
            else:
                params = {"alpha": 10 ** logc, "l1_ratio": rng.uniform(0.05, 0.95)}
            loss = evaluate_model(task, params, X, y)
            population.append(EvalResult(params=params, metric=loss))
            params_list.append(params)
            history.append(min([p.metric for p in population]))
    population.sort(key=lambda x: x.metric)

    mu = min(6, len(population))  # parents
    lam = 8  # offspring per generation
    gens = max(1, (total_evals - len(population)) // lam)

    # Initialize sampler hyper params (Exp25)
    if task == "cls":
        logC_mu = np.mean([math.log10(p.params["C"]) for p in population[:mu]])
        logC_std = np.std([math.log10(p.params["C"]) for p in population[:mu]]) + 0.2
    else:
        logC_mu = np.mean([math.log10(p.params["alpha"]) for p in population[:mu]])
        logC_std = np.std([math.log10(p.params["alpha"]) for p in population[:mu]]) + 0.2

    pheromone = {pen: 1.0 for pen in base_penalties}  # Exp4

    eval_count = len(population)
    while eval_count < total_evals:
        # Update pheromone based on current top-k
        topk = population[:mu]
        for pen in base_penalties:
            hits = sum(1 for p in topk if p.params.get("penalty", pen) == pen)
            pheromone[pen] = 0.2 + 0.8 * (hits / max(1, len(topk)))

        # Sampler hyper co-evo update (Exp25)
        if task == "cls":
            logC_mu = 0.7 * logC_mu + 0.3 * np.mean([math.log10(p.params["C"]) for p in topk])
            logC_std = 0.6 * logC_std + 0.4 * (np.std([math.log10(p.params["C"]) for p in topk]) + 0.15)
        else:
            logC_mu = 0.7 * logC_mu + 0.3 * np.mean([math.log10(p.params["alpha"]) for p in topk])
            logC_std = 0.6 * logC_std + 0.4 * (np.std([math.log10(p.params["alpha"]) for p in topk]) + 0.15)

        # Evolution factor ramp (Exp27) + explore/exploit cadence (Exp18)
        gen_idx = max(0, eval_count - len(logC_grid) * len(base_penalties)) // lam
        ramp = max(0.25, 1.2 * (1.0 - gen_idx / max(1, gens)))
        wide_step = (gen_idx % 4 == 0)
        sigma = (0.6 if wide_step else 0.3) * ramp * max(0.2, logC_std)

        offspring: List[EvalResult] = []
        for _ in range(lam):
            parent = rng.choice(topk)
            if task == "cls":
                logc = math.log10(parent.params["C"]) + rng.normal(0, sigma)
                logc = float(np.clip(logc, -3.0, 3.0))
                penalty = rng.choice(base_penalties, p=np.array([pheromone[p] for p in base_penalties]) / sum(pheromone.values()))
                params = {"penalty": penalty, "C": 10 ** logc}
            else:
                logc = math.log10(parent.params["alpha"]) + rng.normal(0, sigma)
                logc = float(np.clip(logc, -4.0, 2.0))
                params = {"alpha": 10 ** logc, "l1_ratio": float(np.clip(parent.params["l1_ratio"] + rng.normal(0, 0.08), 0.01, 0.99))}

            loss = evaluate_model(task, params, X, y)
            offspring.append(EvalResult(params=params, metric=loss))
            params_list.append(params)
            eval_count += 1
            best_so_far = min([p.metric for p in population] + [o.metric for o in offspring])
            history.append(best_so_far)
            if eval_count >= total_evals:
                break

        population = sorted(population + offspring, key=lambda x: x.metric)[: max(mu, 10)]

    population.sort(key=lambda x: x.metric)
    return history, [p.params for p in population]


In [None]:
# Cell 5

results = []
all_histories = {}
for name, entry in dataset_registry.items():
    task, X, y = entry["task"], entry["X"], entry["y"]
    base_hist, base_params = baseline_random_search(task, X, y, n_trials=52)
    win_hist, win_params = winner_stack(task, X, y, total_evals=52)
    results.append({
        "dataset": name,
        "baseline_best": base_hist[-1],
        "winner_best": win_hist[-1],
        "lift": base_hist[-1] - win_hist[-1],
    })
    all_histories[name] = {"baseline": base_hist, "winner": win_hist}

summary_df = pd.DataFrame(results)
print(summary_df)
summary_df


In [None]:
# Cell 6

fig, axes = plt.subplots(1, len(dataset_registry), figsize=(14, 4))
if len(dataset_registry) == 1:
    axes = [axes]
for ax, (name, curves) in zip(axes, all_histories.items()):
    ax.plot(curves["baseline"], label="Baseline Random Search", color="#999")
    ax.plot(curves["winner"], label="Winner Stack", color="#2c7fb8")
    ax.set_title(name)
    ax.set_xlabel("Evaluations")
    ax.set_ylabel("Best loss so far")
    ax.grid(True, alpha=0.3)
    ax.legend()
plt.tight_layout()
plt.show()


<!-- Cell 7 -->

## Next steps
- Run this in `helios-gpu-118` (or any env with scikit-learn/pandas/matplotlib) to generate results.
- Swap in your real tasks/datasets; the search wrapper is self-contained.
- Adjust `total_evals` and seeds for stability checks or heavier sweeps.
