# Full Ephemeris — Honest NN + Trading Check (No Test Tuning)

**Goal (simple):**

- We want to know if the strong-looking trading curve from an `MLP` is a real edge, or just an accident from tuning on the TEST period.

**The main trap we avoid:**

- If we choose trading parameters (stop-loss, neutral-zone thresholds, etc.) by looking at TEST,
  we can often create an impressive equity curve even from a weak or random signal.

**Honest protocol (what we do here):**

1. Train the model on **TRAIN**.
2. Tune classification threshold and trading parameters using **VALIDATION only**.
3. Freeze everything.
4. Evaluate once on **TEST**.

**Models we compare (pragmatic choice):**

- `xgb` and `rf` are strong tabular baselines.
- `sklearn_mlp` is the simple neural net we already use.
- `keras_mlp` is a slightly more flexible neural net (dropout + L2) to test whether a better regularized NN can help.

We keep the model list short on purpose:
- too many models/variants = many comparisons = more false positives.

In [None]:
# ------------------------------
# Imports and project path setup
# ------------------------------

from pathlib import Path
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = Path('/home/rut/ostrofun')
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import RobustScaler
from sklearn.utils.class_weight import compute_sample_weight

import tensorflow as tf

from RESEARCH2.Moon_cycles.moon_data import (
    MoonLabelConfig,
    load_market_slice,
    build_moon_dataset_for_gauss,
    get_moon_feature_columns,
)
from RESEARCH2.Moon_cycles.splits import make_classic_split
from RESEARCH2.Moon_cycles.ephemeris_data import (
    EphemerisFeatureConfig,
    build_ephemeris_feature_set,
)
from RESEARCH2.Moon_cycles.eval_utils import (
    compute_binary_metrics,
    compute_statistical_significance,
)
from RESEARCH2.Moon_cycles.threshold_utils import tune_threshold_with_balance

from RESEARCH.model_training import train_xgb_model
from RESEARCH2.Moon_cycles.eval_visuals import VisualizationConfig
from RESEARCH2.Moon_cycles.trading_utils import (
    TradingConfig,
    backtest_long_flat_signals,
    build_signal_from_proba,
    plot_backtest_price_and_equity,
)

pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 160)

print('tf version:', tf.__version__)

In [None]:
# ------------------------------
# Configuration
# ------------------------------

# Market range.
START_DATE = '2017-11-01'
END_DATE = None

# Feature mode: full ephemeris.
ORB_MULT = 0.25  # you can change this after v3 finds a better orb
CACHE_NAMESPACE = 'research2_ephem'  # orb=0.25 baseline cache

# Label config.
LABEL_CFG = MoonLabelConfig(
    horizon=1,
    move_share=0.5,
    label_mode='balanced_detrended',
    price_mode='raw',
)

# We fix one gaussian label config here to keep the notebook focused.
# If you want, you can replace this with the best gauss from bakeoff.
GAUSS_WINDOW = 201
GAUSS_STD = 40.0

# Threshold tuning penalties (validation-only).
THRESHOLD_GAP_PENALTY = 0.25
THRESHOLD_PRIOR_PENALTY = 0.05

# Trading params grid (tuned on VALIDATION only).
FEE_RATE = 0.001
STOP_LOSSES = [0.0, 0.01, 0.02, 0.03, 0.05, 0.08]
EXIT_ON_NO_SIGNAL_OPTIONS = [False, True]

# Neutral-zone width around 0.50.
# delta=0.00 -> always decide (0.50/0.50)
# delta=0.02 -> (0.52/0.48)
NEUTRAL_DELTAS = [0.00, 0.005, 0.01, 0.02, 0.03]

# Visual style (dark theme) for trading plots.
VIS_CFG = VisualizationConfig(
    rolling_window_days=90,
    rolling_min_periods=30,
    probability_bins=64,
)

USE_CACHE = True
VERBOSE = True


def make_ephem_cfg(orb_mult: float) -> EphemerisFeatureConfig:
    """Build a full-ephemeris config for a given orb multiplier."""

    return EphemerisFeatureConfig(
        coord_mode='both',
        orb_mult=float(orb_mult),
        include_pair_aspects=True,
        include_phases=True,
        add_trig_for_longitudes=True,
        add_trig_for_moon_phase=True,
        add_trig_for_elongations=True,
        exclude_bodies=(),
    )


print('Config loaded.')
print('ORB_MULT=', ORB_MULT)
print('GAUSS_WINDOW/STD=', GAUSS_WINDOW, GAUSS_STD)

In [None]:
# -------------------------------------------
# Load market + build full ephemeris features
# -------------------------------------------

df_market = load_market_slice(
    start_date=START_DATE,
    end_date=END_DATE,
    use_cache=USE_CACHE,
    verbose=VERBOSE,
)

df_features = build_ephemeris_feature_set(
    df_market=df_market,
    cfg=make_ephem_cfg(ORB_MULT),
    cache_namespace=CACHE_NAMESPACE,
    use_cache=USE_CACHE,
    verbose=VERBOSE,
    progress=True,
)

print('Market rows :', len(df_market))
print('Feature rows:', len(df_features))
print('Feature cols:', len([c for c in df_features.columns if c != 'date']))
print('Market range:', df_market['date'].min().date(), '->', df_market['date'].max().date())

In [None]:
# -------------------------------------------
# Build one dataset + classic split
# -------------------------------------------
#
# Important: we keep ONE label setup fixed to avoid endless tuning.
# The goal here is not to find the best gauss; it is to check if
# trading results are robust when we follow an honest protocol.

df_dataset = build_moon_dataset_for_gauss(
    df_market=df_market,
    df_moon_features=df_features,
    gauss_window=GAUSS_WINDOW,
    gauss_std=GAUSS_STD,
    label_cfg=LABEL_CFG,
    cache_namespace=CACHE_NAMESPACE,
    use_cache=USE_CACHE,
    verbose=VERBOSE,
)

feature_cols = get_moon_feature_columns(df_dataset)
split = make_classic_split(df_dataset)

train_df = df_dataset.iloc[split.train_idx].copy().reset_index(drop=True)
val_df = df_dataset.iloc[split.val_idx].copy().reset_index(drop=True)
test_df = df_dataset.iloc[split.test_idx].copy().reset_index(drop=True)

print('Rows:', {'train': len(train_df), 'val': len(val_df), 'test': len(test_df)})
print('UP share:', {
    'train': float((train_df['target'] == 1).mean()),
    'val': float((val_df['target'] == 1).mean()),
    'test': float((test_df['target'] == 1).mean()),
})
print('Num features:', len(feature_cols))

In [None]:
# -------------------------------------------
# Helpers: training + predictions in a common format
# -------------------------------------------

def _make_pred_frame(df_part: pd.DataFrame, split_role: str, proba_up: np.ndarray) -> pd.DataFrame:
    """Create a small standardized prediction frame for one split."""

    out = df_part[['date', 'close', 'target']].copy().reset_index(drop=True)
    out['split_role'] = split_role
    out['pred_proba_up'] = np.asarray(proba_up, dtype=float)
    return out


def _train_predict_xgb(train_df: pd.DataFrame, val_df: pd.DataFrame, test_df: pd.DataFrame) -> pd.DataFrame:
    """Train XGBoost using the existing project wrapper and return predictions for all splits."""

    X_train = train_df[feature_cols].to_numpy(dtype=float)
    y_train = train_df['target'].to_numpy(dtype=int)
    X_val = val_df[feature_cols].to_numpy(dtype=float)
    y_val = val_df['target'].to_numpy(dtype=int)
    X_test = test_df[feature_cols].to_numpy(dtype=float)

    model = train_xgb_model(
        X_train=X_train,
        y_train=y_train,
        X_val=X_val,
        y_val=y_val,
        feature_names=feature_cols,
        n_classes=2,
        device='cpu',
        verbose=False,
        early_stopping_rounds=50,
        n_estimators=500,
        max_depth=6,
        learning_rate=0.03,
        colsample_bytree=0.8,
        subsample=0.8,
        weight_power=1.0,
        sideways_penalty=1.0,
    )

    p_train = model.predict_proba(X_train)[:, 1]
    p_val = model.predict_proba(X_val)[:, 1]
    p_test = model.predict_proba(X_test)[:, 1]

    df_pred = pd.concat(
        [
            _make_pred_frame(train_df, 'train', p_train),
            _make_pred_frame(val_df, 'val', p_val),
            _make_pred_frame(test_df, 'test', p_test),
        ],
        ignore_index=True,
    )
    return df_pred


def _train_predict_rf(train_df: pd.DataFrame, val_df: pd.DataFrame, test_df: pd.DataFrame) -> pd.DataFrame:
    """Train a RandomForest and return predictions for all splits."""

    X_train = train_df[feature_cols].to_numpy(dtype=float)
    y_train = train_df['target'].to_numpy(dtype=int)
    X_val = val_df[feature_cols].to_numpy(dtype=float)
    X_test = test_df[feature_cols].to_numpy(dtype=float)

    model = RandomForestClassifier(
        n_estimators=800,
        max_depth=6,
        min_samples_leaf=8,
        random_state=42,
        n_jobs=1,
    )

    # RF supports sample_weight.
    w_train = compute_sample_weight(class_weight='balanced', y=y_train)
    model.fit(X_train, y_train, sample_weight=w_train)

    p_train = model.predict_proba(X_train)[:, 1]
    p_val = model.predict_proba(X_val)[:, 1]
    p_test = model.predict_proba(X_test)[:, 1]

    df_pred = pd.concat(
        [
            _make_pred_frame(train_df, 'train', p_train),
            _make_pred_frame(val_df, 'val', p_val),
            _make_pred_frame(test_df, 'test', p_test),
        ],
        ignore_index=True,
    )
    return df_pred


def _train_predict_sklearn_mlp(train_df: pd.DataFrame, val_df: pd.DataFrame, test_df: pd.DataFrame, seed: int = 42) -> pd.DataFrame:
    """Train sklearn MLP (with scaling) and return predictions for all splits."""

    X_train = train_df[feature_cols].to_numpy(dtype=float)
    y_train = train_df['target'].to_numpy(dtype=int)
    X_val = val_df[feature_cols].to_numpy(dtype=float)
    X_test = test_df[feature_cols].to_numpy(dtype=float)

    scaler = RobustScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_val_s = scaler.transform(X_val)
    X_test_s = scaler.transform(X_test)

    model = MLPClassifier(
        hidden_layer_sizes=(64, 32),
        activation='relu',
        alpha=1e-3,
        learning_rate_init=1e-3,
        max_iter=400,
        random_state=int(seed),
    )

    w_train = compute_sample_weight(class_weight='balanced', y=y_train)
    model.fit(X_train_s, y_train, sample_weight=w_train)

    p_train = model.predict_proba(X_train_s)[:, 1]
    p_val = model.predict_proba(X_val_s)[:, 1]
    p_test = model.predict_proba(X_test_s)[:, 1]

    df_pred = pd.concat(
        [
            _make_pred_frame(train_df, 'train', p_train),
            _make_pred_frame(val_df, 'val', p_val),
            _make_pred_frame(test_df, 'test', p_test),
        ],
        ignore_index=True,
    )
    return df_pred


def _build_keras_mlp(input_dim: int, seed: int) -> tf.keras.Model:
    """A small, regularized tabular MLP in Keras.

    Design goal:
    - keep the model small to reduce overfitting risk,
    - add dropout + L2, and early stopping on validation.
    """

    tf.keras.utils.set_random_seed(int(seed))

    inp = tf.keras.Input(shape=(int(input_dim),), name='x')
    x = tf.keras.layers.Dense(
        128,
        activation='relu',
        kernel_regularizer=tf.keras.regularizers.l2(1e-4),
    )(inp)
    x = tf.keras.layers.Dropout(0.25)(x)
    x = tf.keras.layers.Dense(
        64,
        activation='relu',
        kernel_regularizer=tf.keras.regularizers.l2(1e-4),
    )(x)
    x = tf.keras.layers.Dropout(0.25)(x)
    out = tf.keras.layers.Dense(1, activation='sigmoid', name='p_up')(x)

    model = tf.keras.Model(inputs=inp, outputs=out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss='binary_crossentropy',
    )
    return model


def _train_predict_keras_mlp(train_df: pd.DataFrame, val_df: pd.DataFrame, test_df: pd.DataFrame, seed: int = 42) -> pd.DataFrame:
    """Train Keras MLP (with scaling) and return predictions for all splits."""

    X_train = train_df[feature_cols].to_numpy(dtype=float)
    y_train = train_df['target'].to_numpy(dtype=float)
    X_val = val_df[feature_cols].to_numpy(dtype=float)
    y_val = val_df['target'].to_numpy(dtype=float)
    X_test = test_df[feature_cols].to_numpy(dtype=float)

    scaler = RobustScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_val_s = scaler.transform(X_val)
    X_test_s = scaler.transform(X_test)

    # Class balancing via sample weights.
    w_train = compute_sample_weight(class_weight='balanced', y=y_train.astype(int))

    model = _build_keras_mlp(input_dim=X_train_s.shape[1], seed=int(seed))

    es = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=12,
        restore_best_weights=True,
    )

    model.fit(
        X_train_s,
        y_train,
        sample_weight=w_train,
        validation_data=(X_val_s, y_val),
        epochs=200,
        batch_size=64,
        callbacks=[es],
        verbose=0,
    )

    p_train = model.predict(X_train_s, verbose=0).reshape(-1)
    p_val = model.predict(X_val_s, verbose=0).reshape(-1)
    p_test = model.predict(X_test_s, verbose=0).reshape(-1)

    df_pred = pd.concat(
        [
            _make_pred_frame(train_df, 'train', p_train),
            _make_pred_frame(val_df, 'val', p_val),
            _make_pred_frame(test_df, 'test', p_test),
        ],
        ignore_index=True,
    )
    return df_pred

In [None]:
# -------------------------------------------
# Helper: honest trading tuning on VALIDATION only
# -------------------------------------------

def tune_trading_on_validation(
    pred_all: pd.DataFrame,
    model_name: str,
    fee_rate: float,
    stop_losses: list[float],
    exit_on_no_signal_options: list[bool],
    neutral_deltas: list[float],
) -> dict:
    """Tune trading params on validation only, then evaluate on test.

    We tune:
    - neutral zone width around 0.50 (delta)
    - stop_loss_pct
    - exit_on_no_signal

    Important:
    - We NEVER choose these params by looking at the TEST period.
    """

    df_val = pred_all[pred_all['split_role'] == 'val'].copy().reset_index(drop=True)
    df_test = pred_all[pred_all['split_role'] == 'test'].copy().reset_index(drop=True)

    best = None

    combos = [(d, sl, ex) for d in neutral_deltas for sl in stop_losses for ex in exit_on_no_signal_options]
    total = len(combos)

    for i, (delta, sl, ex) in enumerate(combos, start=1):
        up_th = 0.5 + float(delta)
        down_th = 0.5 - float(delta)

        # Build signals from probabilities.
        df_val_tmp = df_val.copy()
        df_val_tmp['signal'] = build_signal_from_proba(
            df_val_tmp['pred_proba_up'].to_numpy(dtype=float),
            threshold_up=up_th,
            threshold_down=down_th,
        )

        cfg = TradingConfig(
            fee_rate=float(fee_rate),
            stop_loss_pct=float(sl),
            exit_on_no_signal=bool(ex),
            close_final_position=True,
            initial_cash=1.0,
        )

        run_val = backtest_long_flat_signals(df_val_tmp, signal_col='signal', cfg=cfg)
        m_val = dict(run_val['metrics'])

        # A small guard against "degenerate" solutions.
        # If there are literally 0 trades, the strategy is basically "do nothing".
        # That can still be a valid outcome, but we want to see it clearly.
        score = float(m_val.get('ulcer_adjusted_return', float('nan')))

        cand = {
            'model': model_name,
            'delta': float(delta),
            'up_th': float(up_th),
            'down_th': float(down_th),
            'stop_loss_pct': float(sl),
            'exit_on_no_signal': bool(ex),
            'val_metrics': m_val,
            'val_run': run_val,
            'score': score,
        }

        if best is None or (np.isfinite(score) and score > float(best['score'])):
            best = cand

        # Primitive progress.
        left = total - i
        if i == 1 or i % 20 == 0 or i == total:
            print(
                f"[{model_name}] tune {i}/{total} left={left} "
                f"delta={delta:.3f} stop={sl:.3f} exit_no_sig={ex} "
                f"| VAL ret={m_val['return_pct']:.2%} UI={m_val['ulcer_index']:.2f} trades={m_val['num_trades']} "
                f"| BEST score={best['score']:.4g}"
            )

    assert best is not None

    # Apply the best params to TEST (frozen).
    df_test_tmp = df_test.copy()
    df_test_tmp['signal'] = build_signal_from_proba(
        df_test_tmp['pred_proba_up'].to_numpy(dtype=float),
        threshold_up=float(best['up_th']),
        threshold_down=float(best['down_th']),
    )

    cfg_best = TradingConfig(
        fee_rate=float(fee_rate),
        stop_loss_pct=float(best['stop_loss_pct']),
        exit_on_no_signal=bool(best['exit_on_no_signal']),
        close_final_position=True,
        initial_cash=1.0,
    )

    run_test = backtest_long_flat_signals(df_test_tmp, signal_col='signal', cfg=cfg_best)

    best['test_run'] = run_test
    best['test_metrics'] = dict(run_test['metrics'])

    return best


def eval_classification_with_val_threshold(pred_all: pd.DataFrame, model_name: str) -> dict:
    """Tune a classification threshold on validation and report metrics."""

    df_val = pred_all[pred_all['split_role'] == 'val']
    df_test = pred_all[pred_all['split_role'] == 'test']

    y_val = df_val['target'].to_numpy(dtype=int)
    p_val = df_val['pred_proba_up'].to_numpy(dtype=float)

    t, score = tune_threshold_with_balance(
        y_val=y_val,
        proba_up=p_val,
        gap_penalty=THRESHOLD_GAP_PENALTY,
        prior_penalty=THRESHOLD_PRIOR_PENALTY,
    )

    out = {'model': model_name, 'val_threshold': float(t), 'val_threshold_score': float(score)}

    for role, df_part in [('val', df_val), ('test', df_test)]:
        y = df_part['target'].to_numpy(dtype=int)
        p = df_part['pred_proba_up'].to_numpy(dtype=float)
        pred = (p >= float(t)).astype(int)

        m = compute_binary_metrics(y_true=y, y_pred=pred)
        s = compute_statistical_significance(y_true=y, y_pred=pred, random_baseline=0.5)

        for k, v in m.items():
            out[f'{role}_{k}'] = float(v) if isinstance(v, (float, int)) else v
        out[f'{role}_p_value_vs_random'] = float(s['p_value_vs_random'])
        out[f'{role}_acc_ci95_low'] = float(s['ci95_low'])
        out[f'{role}_acc_ci95_high'] = float(s['ci95_high'])

    return out

In [None]:
# -------------------------------------------
            # Train models, run honest trading, compare
            # -------------------------------------------

            preds = {}

            print('
Training XGB...')
            preds['xgb'] = _train_predict_xgb(train_df, val_df, test_df)

            print('Training RF...')
            preds['rf'] = _train_predict_rf(train_df, val_df, test_df)

            print('Training sklearn MLP...')
            preds['sklearn_mlp'] = _train_predict_sklearn_mlp(train_df, val_df, test_df, seed=42)

            print('Training keras MLP...')
            preds['keras_mlp'] = _train_predict_keras_mlp(train_df, val_df, test_df, seed=42)

            # Classification report (threshold tuned on validation).
            cls_rows = []
            for name, df_pred in preds.items():
                cls_rows.append(eval_classification_with_val_threshold(df_pred, model_name=name))

            df_cls = pd.DataFrame(cls_rows).sort_values(['val_recall_min','val_recall_gap','val_mcc','val_accuracy'], ascending=[False, True, False, False])
            print('
Classification comparison (val-threshold, sorted by val rule):')
            display(df_cls[[
                'model','val_threshold',
                'val_accuracy','val_mcc','val_recall_min','val_recall_gap','val_p_value_vs_random',
                'test_accuracy','test_mcc','test_recall_min','test_recall_gap','test_p_value_vs_random',
            ]])

            # Honest trading: tune on VAL, apply once on TEST.
            trading_rows = []
            best_trading = {}

            for name, df_pred in preds.items():
                print('
' + '=' * 120)
                print('HONEST TRADING TUNE (VAL only) -> APPLY (TEST):', name)
                best = tune_trading_on_validation(
                    pred_all=df_pred,
                    model_name=name,
                    fee_rate=FEE_RATE,
                    stop_losses=STOP_LOSSES,
                    exit_on_no_signal_options=EXIT_ON_NO_SIGNAL_OPTIONS,
                    neutral_deltas=NEUTRAL_DELTAS,
                )
                best_trading[name] = best

                row = {
                    'model': name,
                    'delta': best['delta'],
                    'stop_loss_pct': best['stop_loss_pct'],
                    'exit_on_no_signal': best['exit_on_no_signal'],
                    'val_return_pct': best['val_metrics']['return_pct'],
                    'val_ulcer': best['val_metrics']['ulcer_index'],
                    'val_uarr': best['val_metrics']['ulcer_adjusted_return'],
                    'val_trades': best['val_metrics']['num_trades'],
                    'test_return_pct': best['test_metrics']['return_pct'],
                    'test_hold_return_pct': best['test_metrics']['hold_return_pct'],
                    'test_excess_return_pct': best['test_metrics']['excess_return_pct'],
                    'test_ulcer': best['test_metrics']['ulcer_index'],
                    'test_uarr': best['test_metrics']['ulcer_adjusted_return'],
                    'test_trades': best['test_metrics']['num_trades'],
                    'test_max_dd_pct': best['test_metrics']['max_drawdown_pct'],
                    'test_exposure_pct': best['test_metrics']['exposure_pct'],
                }
                trading_rows.append(row)

                # Plot the TEST equity for the chosen params.
                plot_backtest_price_and_equity(
                    best['test_run'],
                    title=(
                        f"{name.upper()} — HONEST trading (params tuned on VAL) "
                        f"delta={best['delta']:.3f} stop={best['stop_loss_pct']:.1%} exit_no_sig={best['exit_on_no_signal']}"
                    ),
                    vis_cfg=VIS_CFG,
                )

            df_trading = pd.DataFrame(trading_rows).sort_values(['test_uarr','test_return_pct'], ascending=[False, False]).reset_index(drop=True)
            print('
Trading comparison (params tuned on VAL, evaluated on TEST):')
            display(df_trading)

In [None]:
# -------------------------------------------
            # Stability check for NNs (multiple seeds)
            # -------------------------------------------
            #
            # If the "edge" is real, NN results should be somewhat stable
            # across random seeds. If the result jumps wildly, it is a red flag.

            SEEDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

            def run_seed_stability(kind: str) -> pd.DataFrame:
                rows = []
                for seed in SEEDS:
                    if kind == 'sklearn_mlp':
                        df_pred = _train_predict_sklearn_mlp(train_df, val_df, test_df, seed=int(seed))
                    elif kind == 'keras_mlp':
                        df_pred = _train_predict_keras_mlp(train_df, val_df, test_df, seed=int(seed))
                    else:
                        raise ValueError('unknown kind')

                    # Tune trading params on VAL, apply to TEST.
                    best = tune_trading_on_validation(
                        pred_all=df_pred,
                        model_name=kind,
                        fee_rate=FEE_RATE,
                        stop_losses=STOP_LOSSES,
                        exit_on_no_signal_options=EXIT_ON_NO_SIGNAL_OPTIONS,
                        neutral_deltas=NEUTRAL_DELTAS,
                    )

                    rows.append({
                        'seed': int(seed),
                        'delta': best['delta'],
                        'stop_loss_pct': best['stop_loss_pct'],
                        'exit_on_no_signal': best['exit_on_no_signal'],
                        'val_uarr': best['val_metrics']['ulcer_adjusted_return'],
                        'test_uarr': best['test_metrics']['ulcer_adjusted_return'],
                        'test_return_pct': best['test_metrics']['return_pct'],
                        'test_excess_return_pct': best['test_metrics']['excess_return_pct'],
                        'test_ulcer': best['test_metrics']['ulcer_index'],
                        'test_trades': best['test_metrics']['num_trades'],
                    })

                    print(f'[{kind}] seed={seed} -> test_ret={rows[-1]["test_return_pct"]:.2%} test_uarr={rows[-1]["test_uarr"]:.4g}')

                return pd.DataFrame(rows)


            df_sklearn = run_seed_stability('sklearn_mlp')
            df_keras = run_seed_stability('keras_mlp')

            print('
Seed stability summary (sklearn_mlp):')
            display(df_sklearn.describe(include='all'))
            print('
Seed stability summary (keras_mlp):')
            display(df_keras.describe(include='all'))