In [1]:
# === ENV: install libs (run once) ===
# Use conda if you need GPU wheels for torch; pip below is the simple option.
! pip install scikit-learn lightgbm xgboost scipy
! pip install signatory   # preferred signatures backend (requires torch)
# ! pip install torch       # install matching CUDA build if using GPU (recommended)
! pip install iisignature # fallback signature lib (C compiled)
# ! pip install giotto-tda  # optional TDA (persistence + persistence images)
# Optional (Dionysus sometimes needs conda-forge):
# conda install -c conda-forge dionysus

Collecting signatory
  Downloading signatory-1.2.6.1.9.0.tar.gz (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.8/62.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: signatory
  Building wheel for signatory (setup.py) ... [?25l[?25hdone
  Created wheel for signatory: filename=signatory-1.2.6.1.9.0-cp311-cp311-linux_x86_64.whl size=12019475 sha256=501aa517cfdd37ad85e69a45112a8b32ea64e16c9e722b6a93055b82e2e7e452
  Stored in directory: /root/.cache/pip/wheels/6a/79/bb/6012413145dd168da55413ef8bc837f507bf829a08a176c329
Successfully built signatory
Installing collected packages: signatory
Successfully installed signatory-1.2.6.1.9.0
Collecting iisignature
  Downloading iisignature-0.24.tar.gz (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) .

In [2]:
# === 1) Imports & config ===
import os, gc, math
import numpy as np
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
import xgboost as xgb
import warnings
warnings.filterwarnings("ignore")

RANDOM = 42
N_SPLITS = 5
SIGNATURE_WINDOW = 30      # days of history for signatures / TDA
SIGNATURE_DEPTH = 3
FFT_N = 6

In [3]:
# === 2) Load data & basic cleaning ===
# Adjust paths to your local files
train = pd.read_csv("/kaggle/input/hull-tactical-market-prediction/test.csv", parse_dates=False)  # date_id is an integer id
test  = pd.read_csv("/kaggle/input/hull-tactical-market-prediction/train.csv", parse_dates=False)

# mark
train['is_train'] = True
test['is_train']  = False

# keep original target if present
TARGET = 'forward_returns'
# test may include lagged columns already - keep them as features

# unify columns: ensure both frames have same columns
missing_cols = set(train.columns) - set(test.columns)
for c in missing_cols:
    if c not in ['forward_returns', 'risk_free_rate', 'market_forward_excess_returns']: # train-only
        test[c] = np.nan

data = pd.concat([train, test], ignore_index=True, sort=False)
# sort by date_id to guarantee time order
data = data.sort_values('date_id').reset_index(drop=True)

# At this stage many early rows may have NaNs. We'll use forward/backfill and median impute later.
print("Data loaded: rows=", len(data), "cols=", len(data.columns))# === 3) Helper: winsorize by MAD & simple impute ===
from scipy.stats import median_abs_deviation

def winsorize_mad(series, thresh=4.0):
    # returns winsorized series
    med = np.nanmedian(series)
    mad = median_abs_deviation(series, nan_policy='omit')
    if mad == 0 or np.isnan(mad):
        return series.fillna(med)
    lower = med - thresh * mad
    upper = med + thresh * mad
    return series.clip(lower, upper)

def basic_impute(df, group_col=None, strategy='ffill_mean'):
    out = df.copy()
    out = out.fillna(method='ffill').fillna(method='bfill')
    
    for c in out.columns:
        if out[c].isna().any():
            # Check if ALL values are NaN after ffill/bfill
            if out[c].isna().all():
                out[c] = 0  # or some other default value
            else:
                med = out[c].median(skipna=True)
                out[c] = out[c].fillna(med)
    return out

Data loaded: rows= 9031 cols= 103


In [5]:
data.isna().sum().sum(), train.isna().sum().sum(), test.isna().sum().sum()

(173789, 0, 173759)

In [62]:
# === 4) Conventional features: lags, rolling and FFT ===
def add_basic_ts_features(df, value_prefixes=('M','E','I','P','V','S','MOM','D'), 
                          windows=(5,20,60), fft_n=FFT_N):
    """
    For each numeric column whose name starts with prefix, compute lags and rolling stats.
    We compute FFT on the multivariate vector for each date using a trailing window across all features.
    """
    df = df.copy()
    # pick feature columns by prefix (preserve provided lagged_* too)
    candidate_cols = [c for c in df.columns if any(c.startswith(pref) for pref in value_prefixes)]
    # include lagged_* if present
    candidate_cols += [c for c in df.columns if c.startswith('lagged_')]
    candidate_cols = sorted(set(candidate_cols) & set(df.columns))

    # create shifted lags and rolling per column (trailing windows)
    for w in windows:
        for c in candidate_cols:
            df[f'{c}_lag_{w}'] = df[c].shift(w)
            df[f'{c}_rollmean_{w}'] = df[c].shift(1).rolling(window=w).mean().reset_index(drop=True)
            df[f'{c}_rollstd_{w}'] = df[c].shift(1).rolling(window=w).std().reset_index(drop=True)
    # FFT features: for each date, take the trailing window of length max(windows) across candidate_cols
    max_w = max(windows)
    fft_cols = [f'fft_{i}' for i in range(1, fft_n+1)]
    # create empty columns
    for fc in fft_cols:
        df[fc] = np.nan
    # compute FFT magnitudes of flattened multivariate trailing vector (may be heavy)
    # optimized: compute per-row using rolling index
    values = df[candidate_cols].values
    for i in range(len(df)):
        s = max(0, i - max_w + 1)
        window_block = values[s:i+1]  # shape (L, n_features)
        if window_block.shape[0] < 3:
            continue
        # flatten along time axis to 1D signal
        flat = window_block.ravel()
        arr = np.fft.rfft(np.nan_to_num(flat))
        mags = np.abs(arr)
        # assign first FFT_N non-dc mags
        for k in range(1, min(len(mags), fft_n+1)):
            df.at[i, f'fft_{k}'] = mags[k]
    # winsorize numeric columns to reduce extreme outliers
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    for c in numeric_cols:
        df[c] = winsorize_mad(df[c], thresh=4.0)
    # basic final impute
    df = basic_impute(df)
    return df

# usage:
data = add_basic_ts_features(data)

In [63]:
data.isna().sum().sum()

0

In [64]:
# === 5) Signatures: sliding-window per date_id using signatory or iisignature ===
# Signatures will produce one feature vector per date_id (signature of preceding SIGNATURE_WINDOW rows of selected features)

# preferred: signatory (GPU+torch); fallback: iisignature (C)
try:
    import torch, signatory
    SIGNATORY_OK = True
except Exception:
    SIGNATORY_OK = False

try:
    import iisignature
    IISIGNATURE_OK = True
except Exception:
    IISIGNATURE_OK = False

def compute_signatures_by_date(df, feature_cols, window=SIGNATURE_WINDOW, depth=SIGNATURE_DEPTH, id_col='date_id'):
    """
    Returns DataFrame with columns [date_id, sig_0, sig_1, ...] where each row is the signature
    of the trailing window of 'feature_cols' ending at that date_id.
    If insufficient history, signature is NaN and will be dropped later.
    """
    sig_rows = []
    # ensure sorted
    df = df.sort_values(id_col).reset_index(drop=True)
    feats = df[feature_cols].values.astype(np.float32)
    n, d = feats.shape
    if SIGNATORY_OK:
        # compute per-date sliding window in a loop (vectorization possible but more complex)
        for i in range(n):
            start = i - window + 1
            if start < 0:
                sig_rows.append((df.at[i, id_col], None))
                continue
            seg = torch.tensor(feats[start:i+1][None], device='cpu')  # shape (1, L, d)
            try:
                sig = signatory.signature(seg, depth=depth)[0].cpu().numpy()
            except Exception:
                sig = None
            sig_rows.append((df.at[i, id_col], sig))
    elif IISIGNATURE_OK:
        for i in range(n):
            start = i - window + 1
            if start < 0:
                sig_rows.append((df.at[i, id_col], None))
                continue
            seg = feats[start:i+1]
            try:
                sig = iisignature.sig(seg, depth)
            except Exception:
                sig = None
            sig_rows.append((df.at[i, id_col], sig))
    else:
        raise RuntimeError("No signature backend available (install signatory or iisignature).")

    # build DataFrame, drop None rows
    valid = [(idx, s) for idx, s in sig_rows if s is not None]
    if len(valid) == 0:
        raise RuntimeError("No signatures computed (insufficient history). Consider reducing window.")
    feat_dim = valid[0][1].shape[0]
    cols = ['date_id'] + [f'sig_{i}' for i in range(feat_dim)]
    out = pd.DataFrame([ [idx] + list(s) for idx, s in valid ], columns=cols)
    return out

# Usage:
selected_cols = [c for c in data.columns if c.startswith(('M','E','I','P','V','S','MOM'))][:10]  # choose top K to limit dimension
sig_df = compute_signatures_by_date(data, selected_cols, window=30, depth=3)
data = data.merge(sig_df, on='date_id', how='left')
data = basic_impute(data)

In [66]:
data.isna().sum().sum()

0

In [69]:
# === 6) Optional TDA (giotto-tda): persistence image per trailing window ===
# This block is optional and slower. It computes a persistence image for each trailing window of multivariate features.

try:
    from gtda.homology import VietorisRipsPersistence
    from gtda.diagrams import PersistenceImage
    GIOTTO_OK = True
except Exception:
    GIOTTO_OK = False

def compute_persistence_images_by_date(df, feature_cols, window=SIGNATURE_WINDOW, id_col='date_id'):
    if not GIOTTO_OK:
        raise RuntimeError("giotto-tda not installed. Install or skip TDA.")
    pr = VietorisRipsPersistence(metric='euclidean', homology_dimensions=[0,1])
    pimg = PersistenceImage()
    rows = []
    df = df.sort_values(id_col).reset_index(drop=True)
    feats = df[feature_cols].values
    n = len(df)
    for i in range(n):
        start = i - window + 1
        if start < 0:
            rows.append((df.at[i, id_col], None))
            continue
        block = feats[start:i+1]
        if block.shape[0] < 3:
            rows.append((df.at[i, id_col], None))
            continue
        diagrams = pr.fit_transform(block[None])    # returns array shape (1, ... )
        image = pimg.fit_transform(diagrams)[0].ravel()
        rows.append((df.at[i, id_col], image))
    valid = [(idx,img) for idx,img in rows if img is not None]
    if len(valid) == 0:
        raise RuntimeError("No persistence images computed.")
    dim = valid[0][1].shape[0]
    cols = ['date_id'] + [f'pi_{i}' for i in range(dim)]
    out = pd.DataFrame([ [idx] + list(img) for idx,img in valid ], columns=cols)
    return out

# Usage:
# pi_df = compute_persistence_images_by_date(data, selected_cols, window=30)
# data = data.merge(pi_df, on='date_id', how='left')

In [70]:
# === 7) Assemble final training set and features list ===
def prepare_features(data, signature_cols_keep_k=200):
    df = data.copy()
    # Basic features: all numeric except target and identifiers
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    ignore = ['forward_returns', 'risk_free_rate', 'market_forward_excess_returns', 'is_train']
    numeric_cols = [c for c in numeric_cols if c not in ignore]

    # Optionally pick top-K signature columns (sig_*)
    sig_cols = [c for c in numeric_cols if c.startswith('sig_')]
    if len(sig_cols) > signature_cols_keep_k:
        # simple heuristic: keep first K
        sig_cols = sig_cols[:signature_cols_keep_k]

    pi_cols = [c for c in numeric_cols if c.startswith('pi_')]
    # final features: numeric base + signatures + persistence images
    base = [c for c in numeric_cols if not (c.startswith('sig_') or c.startswith('pi_'))]
    features = base + sig_cols + pi_cols
    # remove date_id if present
    if 'date_id' in features:
        features.remove('date_id')
    return df, features

# usage:
# after computing signatures/tda and merging into data:
data, features = prepare_features(data)

In [71]:
data

Unnamed: 0,date_id,D1,D2,D3,D4,D5,D6,D7,D8,D9,...,sig_1100,sig_1101,sig_1102,sig_1103,sig_1104,sig_1105,sig_1106,sig_1107,sig_1108,sig_1109
0,0,0,0,0,1,1,0,0,0,1,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,1,0,0,0,1,1,0,0,0,1,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,2,0,0,0,1,0,0,0,0,1,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,3,0,0,0,1,0,0,0,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,4,0,0,0,1,0,0,0,0,0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9015,8988,0,0,0,0,0,0,0,0,0,...,-0.000055,-0.000007,0.000058,0.000058,0.000014,0.000014,-0.000150,0.000626,0.000170,-0.000106
9016,8989,0,0,0,0,0,0,0,0,0,...,-0.000063,-0.000009,0.000056,0.000056,0.000012,0.000012,-0.000146,0.000608,0.000165,-0.000106
9017,8989,0,0,0,0,0,0,0,0,0,...,-0.000061,-0.000008,0.000055,0.000055,0.000012,0.000012,-0.000144,0.000591,0.000161,-0.000103
9018,8989,0,0,0,0,0,0,0,0,0,...,-0.000063,-0.000009,0.000056,0.000056,0.000012,0.000012,-0.000146,0.000608,0.000165,-0.000106


In [72]:
# === Neural Network Model Definitions ===
from tensorflow import keras
from tensorflow.keras import layers, callbacks
import tensorflow as tf

def build_deep_residual_model(input_dim):
    """Deep Residual Network with Dropout"""
    inputs = keras.Input(shape=(input_dim,))
    
    # Initial dense layer
    x = layers.Dense(256, activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    
    # Residual blocks
    for units in [128, 128, 64]:
        residual = x
        x = layers.Dense(units, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        x = layers.Dropout(0.2)(x)
        x = layers.Dense(units, activation='relu')(x)
        x = layers.BatchNormalization()(x)
        if residual.shape[-1] == units:
            x = layers.Add()([x, residual])
        x = layers.Dropout(0.2)(x)
    
    # Output
    outputs = layers.Dense(1)(x)
    
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=keras.optimizers.Adam(0.001), loss='mse', metrics=['mae'])
    return model

def build_wide_deep_model(input_dim):
    """Wide & Deep Architecture"""
    inputs = keras.Input(shape=(input_dim,))
    
    # Wide component (linear)
    wide = layers.Dense(32, activation='relu')(inputs)
    
    # Deep component
    deep = layers.Dense(256, activation='relu')(inputs)
    deep = layers.BatchNormalization()(deep)
    deep = layers.Dropout(0.3)(deep)
    deep = layers.Dense(128, activation='relu')(deep)
    deep = layers.BatchNormalization()(deep)
    deep = layers.Dropout(0.2)(deep)
    deep = layers.Dense(64, activation='relu')(deep)
    deep = layers.Dropout(0.2)(deep)
    
    # Combine wide and deep
    combined = layers.Concatenate()([wide, deep])
    combined = layers.Dense(32, activation='relu')(combined)
    outputs = layers.Dense(1)(combined)
    
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=keras.optimizers.Adam(0.001), loss='mse', metrics=['mae'])
    return model

In [73]:
# === 8) Model training: TimeSeries CV + DNN + XGB OOF ===
def train_oof_dnn_xgb(df, features, target='forward_returns', date_col='date_id', n_splits=N_SPLITS, gpu=True):
    """Train DNN and XGBoost with out-of-fold predictions"""
    df = df.sort_values(date_col).reset_index(drop=True)
    idx = df.index.values
    n = len(df)
    tss = TimeSeriesSplit(n_splits=n_splits)
    
    oof_dnn = np.zeros(n)
    oof_xgb = np.zeros(n)
    
    # XGBoost params
    xgb_params = dict(
        objective='reg:squarederror', 
        eval_metric='rmse', 
        learning_rate=0.02,
        max_depth=8, 
        subsample=0.8, 
        colsample_bytree=0.8, 
        tree_method='gpu_hist' if gpu else 'hist',
        verbose=5
    )
    
    # Only train on rows that have target (train portion)
    train_mask = ~df[target].isna()
    train_idx = np.where(train_mask)[0]
    splits = list(tss.split(train_idx))
    print('show splits#############', len(splits))
    
    for fold, (t_idx_local, v_idx_local) in enumerate(splits):
        print(f"\n=== Fold {fold+1}/{len(splits)} ===")
        t_idx = train_idx[t_idx_local]
        v_idx = train_idx[v_idx_local]
        
        X_tr, X_val = df.iloc[t_idx][features], df.iloc[v_idx][features]
        y_tr, y_val = df.iloc[t_idx][target].values, df.iloc[v_idx][target].values
        
        # === Deep Neural Network ===
        print("Training DNN...")
        model_dnn = build_deep_residual_model(len(features))
        
        early_stop = callbacks.EarlyStopping(
            monitor='val_loss', 
            patience=30, 
            restore_best_weights=True,
            verbose=1
        )
        reduce_lr = callbacks.ReduceLROnPlateau(
            monitor='val_loss', 
            factor=0.5, 
            patience=15, 
            min_lr=1e-6,
            verbose=1
        )
        
        history = model_dnn.fit(
            X_tr.values, y_tr,
            validation_data=(X_val.values, y_val),
            epochs=500,
            batch_size=256,
            callbacks=[early_stop, reduce_lr],
            verbose=0
        )
        
        oof_dnn[v_idx] = model_dnn.predict(X_val.values, verbose=0).flatten()
        print(f"DNN Fold {fold+1} val loss: {min(history.history['val_loss']):.6f}")
        
        # === XGBoost ===
        print("Training XGBoost...")
        dtrain_x = xgb.DMatrix(X_tr, label=y_tr)
        dvalid_x = xgb.DMatrix(X_val, label=y_val)
        
        model_xgb = xgb.train(
            xgb_params, 
            dtrain_x, 
            num_boost_round=10000, 
            evals=[(dvalid_x, 'valid')],
            early_stopping_rounds=200,
            verbose_eval=500
        )
        
        oof_xgb[v_idx] = model_xgb.predict(dvalid_x, iteration_range=(0, model_xgb.best_iteration))
        
        # Cleanup
        del model_dnn, model_xgb, dtrain_x, dvalid_x
        keras.backend.clear_session()
        gc.collect()
    
    # Blend simple average for OOF evaluation
    oof_blend = 0.5 * oof_dnn + 0.5 * oof_xgb
    
    # Compute OOF on training rows
    oof_rmse = mean_squared_error(df.loc[train_idx, target], oof_blend[train_idx], squared=False)
    print("\n" + "="*50)
    print(f"OOF RMSE (DNN): {mean_squared_error(df.loc[train_idx, target], oof_dnn[train_idx], squared=False):.6f}")
    print(f"OOF RMSE (XGB): {mean_squared_error(df.loc[train_idx, target], oof_xgb[train_idx], squared=False):.6f}")
    print(f"OOF RMSE (blend): {oof_rmse:.6f}")
    print("="*50)
    
    return oof_dnn, oof_xgb, oof_blend, oof_rmse

In [74]:
# === 9) Meta-stacker (Ridge) and full-train/predict helpers ===
def train_meta_and_full_predict(df, features, oof_preds, target='forward_returns', date_col='date_id', gpu=True):
    """Train meta-model (Ridge) and full models for final predictions"""
    
    df = df.sort_values(date_col).reset_index(drop=True)
    train_mask = ~df[target].isna()
    train_idx = np.where(train_mask)[0]
    
    # Prepare meta features
    if isinstance(oof_preds, dict):
        X_meta = np.vstack([oof_preds[k] for k in sorted(oof_preds.keys())]).T
    else:
        X_meta = oof_preds
    
    # Train meta-model (Ridge)
    print("\n=== Training Meta-Model (Ridge) ===")
    meta = Ridge(alpha=1.0)
    meta.fit(X_meta[train_idx], df.loc[train_idx, target].values)
    oof_meta = meta.predict(X_meta)
    meta_rmse = mean_squared_error(df.loc[train_idx, target].values, oof_meta[train_idx], squared=False)
    print(f"Meta OOF RMSE: {meta_rmse:.6f}")
    
    # Full-train on all training rows
    X_full = df.loc[train_idx, features]
    y_full = df.loc[train_idx, target].values
    
    # === Wide & Deep Neural Network (full train) ===
    print("\n=== Training Full Wide-Deep DNN ===")
    model_dnn = build_wide_deep_model(len(features))
    
    reduce_lr = callbacks.ReduceLROnPlateau(
        monitor='loss', 
        factor=0.5, 
        patience=20, 
        min_lr=1e-6,
        verbose=1
    )
    
    history = model_dnn.fit(
        X_full.values, y_full,
        epochs=300,
        batch_size=256,
        callbacks=[reduce_lr],
        verbose=1
    )
    
    # === XGBoost (full train) ===
    print("\n=== Training Full XGBoost ===")
    xgb_params = dict(
        objective='reg:squarederror', 
        eval_metric='rmse', 
        learning_rate=0.02,
        max_depth=8, 
        subsample=0.8, 
        colsample_bytree=0.8, 
        tree_method='gpu_hist' if gpu else 'hist'
    )
    dtrain = xgb.DMatrix(X_full, label=y_full)
    model_xgb = xgb.train(xgb_params, dtrain, num_boost_round=2000, verbose_eval=500)
    
    # Predict for all rows (train + test)
    print("\n=== Generating Predictions ===")
    all_X = df[features]
    
    pred_dnn_all = model_dnn.predict(all_X.values, verbose=0).flatten()
    pred_xgb_all = model_xgb.predict(xgb.DMatrix(all_X))
    
    # Stack and meta-predict
    stack_input = np.vstack([pred_dnn_all, pred_xgb_all]).T
    pred_meta_all = meta.predict(stack_input)
    
    # Add predictions to dataframe
    df['pred_dnn'] = pred_dnn_all
    df['pred_xgb'] = pred_xgb_all
    df['pred_meta'] = pred_meta_all
    
    print("\n=== Training Complete ===")
    return df, model_dnn, model_xgb, meta

In [75]:
# === TRAINING CELL - runs once when notebook starts ===
import joblib
import gc

def train_and_save_ensemble():
    """Complete training pipeline with DNN + XGBoost + Ridge meta-model"""
    
    print("="*60)
    print("Starting Ensemble Training Pipeline")
    print("="*60)
    
    # 1. Feature Engineering
    print("\n=== Step 1: Feature Engineering ===")
    data_fe = add_basic_ts_features(data, windows=(5,20,60), fft_n=FFT_N)
    
    # Feature candidates
    feat_candidates = [c for c in data_fe.columns if c.startswith(('M','E','I','P','V','S','MOM'))]
    feat_candidates = sorted(feat_candidates)[:12]
    
    # 2. Signatures (optional)
    print("\n=== Step 2: Computing Signatures ===")
    try:
        sig_df = compute_signatures_by_date(
            data_fe, 
            feat_candidates, 
            window=SIGNATURE_WINDOW, 
            depth=SIGNATURE_DEPTH
        )
        data_fe = data_fe.merge(sig_df, on='date_id', how='left')
        print("Signatures computed successfully")
    except Exception as e:
        print(f"Signature step skipped: {e}")
    
    # 3. Final feature preparation
    print("\n=== Step 3: Preparing Features ===")
    data_final, features = prepare_features(data_fe, signature_cols_keep_k=200)
    print(f"Total features: {len(features)}")
    
    # 4. Train OOF models (DNN + XGBoost)
    print("\n=== Step 4: Training OOF Models ===")
    oof_dnn, oof_xgb, oof_blend, oof_rmse = train_oof_dnn_xgb(
        data_final, 
        features, 
        target=TARGET, 
        n_splits=N_SPLITS, 
        gpu=True
    )
    
    # 5. Meta-stacking with Ridge
    print("\n=== Step 5: Meta-Stacking ===")
    oof_preds = {'dnn': oof_dnn, 'xgb': oof_xgb}
    df_preds, model_dnn, model_xgb, meta = train_meta_and_full_predict(
        data_final, 
        features, 
        oof_preds, 
        target=TARGET, 
        gpu=True
    )
    
    # 6. Save entire ensemble
    print("\n=== Step 6: Saving Ensemble ===")
    ensemble = {
        'model_dnn': model_dnn,
        'model_xgb': model_xgb, 
        'meta_model': meta,
        'features': features,
        'feature_processor': None,  # Add if you have feature preprocessing
        'data_final': data_final,   # For reference
        'oof_rmse': oof_rmse
    }
    
    joblib.dump(ensemble, 'ensemble_model.pkl')
    print("\n" + "="*60)
    print("Ensemble trained and saved successfully!")
    print(f"Final OOF RMSE: {oof_rmse:.6f}")
    print("="*60)
    
    return ensemble

# Train immediately
trained_ensemble = train_and_save_ensemble()

Starting Ensemble Training Pipeline

=== Step 1: Feature Engineering ===

=== Step 2: Computing Signatures ===
Signatures computed successfully

=== Step 3: Preparing Features ===
Total features: 9034

=== Step 4: Training OOF Models ===
show splits############# 5

=== Fold 1/5 ===
Training DNN...

Epoch 125: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 148: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 163: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 178: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 180: early stopping
Restoring model weights from the end of the best epoch: 150.
DNN Fold 1 val loss: 0.000301
Training XGBoost...
[0]	valid-rmse:0.01073
[200]	valid-rmse:0.01084

=== Fold 2/5 ===
Training DNN...

Epoch 84: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 99: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 114:

In [85]:
# === INFERENCE SETUP ===
import os
import pandas as pd
import polars as pl
import kaggle_evaluation.default_inference_server
import numpy as np
import joblib

# Global ensemble cache
cached_ensemble = None

def load_ensemble():
    """Load pre-trained ensemble - called once on first predict"""
    global cached_ensemble
    if cached_ensemble is None:
        try:
            cached_ensemble = joblib.load('ensemble_model.pkl')
            print("Ensemble loaded successfully!")
        except Exception as e:
            print(f"Failed to load from file: {e}")
            try:
                cached_ensemble = trained_ensemble
                print("Using in-memory trained ensemble")
            except:
                print("ERROR: No ensemble available!")
                raise
    return cached_ensemble

def predict(test: pl.DataFrame) -> pl.DataFrame:
    """Competition inference function - predicts positions [0, 2]"""
    
    ensemble = load_ensemble()
    
    # Convert to pandas
    test_pd = test.to_pandas()
    
    try:
        # Ensure we have the same features (in same order)
        features = ensemble['features']
        
        # Get feature matrix (handle missing features gracefully)
        if set(features).issubset(test_pd.columns):
            X_test = test_pd[features].copy()
        else:
            # Create DataFrame with available features
            X_test = test_pd[[f for f in features if f in test_pd.columns]].copy()
            
            # Add missing features with zeros
            missing_features = set(features) - set(X_test.columns)
            for f in missing_features:
                X_test[f] = 0.0
        
        # Ensure correct feature order
        X_test = X_test[features]
        X_test = X_test.astype(float)
        print(X_test.dtypes)
        # Get base model predictions
        pred_dnn = ensemble['model_dnn'].predict(X_test.values, verbose=0).flatten()
        pred_xgb = ensemble['model_xgb'].predict(xgb.DMatrix(X_test))
        
        # Meta-stack with Ridge
        stack_input = np.column_stack([pred_dnn, pred_xgb])
        final_predictions = ensemble['meta_model'].predict(stack_input)
        
        # Convert returns to positions [0, 2]
        # Simple scaling: shift and clip
        # Adjust this based on your expected return distribution
        positions = np.clip(final_predictions * 10 + 1, 0, 2)
        
    except Exception as e:
        print(f"Inference error: {e}, using fallback")
        # Fallback: neutral position
        positions = np.ones(len(test_pd)) * 1.0
    
    return pl.DataFrame({'prediction': positions})

# Test inference function (optional)
def test_inference():
    """Quick test of inference pipeline"""
    print("\n=== Testing Inference ===")
    ensemble = load_ensemble()
    
    # Create dummy test data
    n_test = 100
    test_data = pd.DataFrame(
        np.random.randn(n_test, len(ensemble['features'])),
        columns=ensemble['features']
    )
    test_pl = pl.from_pandas(test_data)
    
    # Test prediction
    result = predict(test_pl)
    print(f"Generated {len(result)} predictions")
    print(f"Prediction range: [{result['prediction'].min():.4f}, {result['prediction'].max():.4f}]")
    print(f"Mean prediction: {result['prediction'].mean():.4f}")
    print("Inference test completed successfully!")

# Uncomment to test inference
test_inference()


=== Testing Inference ===
Ensemble loaded successfully!
D1           float64
D2           float64
D3           float64
D4           float64
D5           float64
              ...   
sig_195_x    float64
sig_196_x    float64
sig_197_x    float64
sig_198_x    float64
sig_199_x    float64
Length: 9034, dtype: object
Generated 100 predictions
Prediction range: [0.4720, 1.4121]
Mean prediction: 0.9530
Inference test completed successfully!


In [86]:
# SERVER STARTUP (same as before)
inference_server = kaggle_evaluation.default_inference_server.DefaultInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(('/kaggle/input/hull-tactical-market-prediction/',))

D1           float64
D2           float64
D3           float64
D4           float64
D5           float64
              ...   
sig_195_x    float64
sig_196_x    float64
sig_197_x    float64
sig_198_x    float64
sig_199_x    float64
Length: 9034, dtype: object
D1           float64
D2           float64
D3           float64
D4           float64
D5           float64
              ...   
sig_195_x    float64
sig_196_x    float64
sig_197_x    float64
sig_198_x    float64
sig_199_x    float64
Length: 9034, dtype: object
D1           float64
D2           float64
D3           float64
D4           float64
D5           float64
              ...   
sig_195_x    float64
sig_196_x    float64
sig_197_x    float64
sig_198_x    float64
sig_199_x    float64
Length: 9034, dtype: object
D1           float64
D2           float64
D3           float64
D4           float64
D5           float64
              ...   
sig_195_x    float64
sig_196_x    float64
sig_197_x    float64
sig_198_x    float64
sig_199_x    

In [87]:
pl.read_parquet("/kaggle/working/submission.parquet").head()

date_id,prediction
i64,f64
8980,0.996973
8981,1.001072
8982,0.998926
8983,1.000259
8984,1.00017
