# Week 6 — Homework Solution: Meta-Labeling Pipeline

**Course:** ML for Quantitative Finance  
**Status:** SOLUTION — do not distribute to students before deadline

---

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 5)

In [None]:
spy = yf.download('SPY', start='2010-01-01', end='2024-12-31', progress=False)
spy.columns = spy.columns.droplevel(1)
spy['ret'] = spy['Close'].pct_change()
spy = spy.dropna()

## Part 1: Primary Model

In [None]:
spy['sma_50'] = spy['Close'].rolling(50).mean()
spy['sma_200'] = spy['Close'].rolling(200).mean()
spy['signal'] = np.where(spy['sma_50'] > spy['sma_200'], 1, -1)

fig, ax = plt.subplots(figsize=(14, 5))
ax.plot(spy['Close'], label='SPY', alpha=0.7)
ax.plot(spy['sma_50'], label='SMA(50)', linestyle='--')
ax.plot(spy['sma_200'], label='SMA(200)', linestyle='--')

# Shade long/short periods
for i in range(1, len(spy)):
    if spy['signal'].iloc[i] == 1:
        ax.axvspan(spy.index[i-1], spy.index[i], alpha=0.02, color='green')

ax.set_title('SPY with SMA Crossover Signal')
ax.legend()
plt.tight_layout()
plt.show()

# Primary model performance
spy_signal = spy.dropna(subset=['sma_200'])
primary_ret = spy_signal['ret'] * spy_signal['signal'].shift(1)
primary_ret = primary_ret.dropna()

sharpe = primary_ret.mean() / primary_ret.std() * np.sqrt(252)
print(f"Primary model (SMA crossover) Sharpe: {sharpe:.2f}")

## Part 2: Triple-Barrier Labeling

In [None]:
class TripleBarrierLabeler:
    """Triple-barrier labeling for financial ML."""

    def __init__(self, pt_mult=2.0, sl_mult=2.0, max_holding=10, vol_window=20):
        self.pt_mult = pt_mult
        self.sl_mult = sl_mult
        self.max_holding = max_holding
        self.vol_window = vol_window

    def label(self, prices):
        """Generate triple-barrier labels."""
        returns = prices.pct_change()
        vol = returns.rolling(self.vol_window).std()
        results = []

        for i in range(self.vol_window, len(prices) - self.max_holding):
            entry = prices.iloc[i]
            daily_vol = vol.iloc[i]
            if np.isnan(daily_vol) or daily_vol <= 0:
                continue

            pt = entry * (1 + self.pt_mult * daily_vol)
            sl = entry * (1 - self.sl_mult * daily_vol)

            for j in range(1, self.max_holding + 1):
                if i + j >= len(prices):
                    break
                p = prices.iloc[i + j]

                if p >= pt:
                    results.append({'date': prices.index[i], 'label': 1,
                                   'ret': (p - entry) / entry,
                                   'barrier': 'pt', 'days': j})
                    break
                elif p <= sl:
                    results.append({'date': prices.index[i], 'label': -1,
                                   'ret': (p - entry) / entry,
                                   'barrier': 'sl', 'days': j})
                    break
            else:
                end_p = prices.iloc[i + self.max_holding]
                ret = (end_p - entry) / entry
                results.append({'date': prices.index[i],
                               'label': int(np.sign(ret)) if ret != 0 else 0,
                               'ret': ret, 'barrier': 'time', 'days': self.max_holding})

        return pd.DataFrame(results).set_index('date')


labeler = TripleBarrierLabeler(pt_mult=2.0, sl_mult=2.0, max_holding=10)
labels = labeler.label(spy['Close'])

print(f"Labels: {len(labels)}")
print(f"Barrier distribution: {labels['barrier'].value_counts().to_dict()}")
print(f"Label distribution: {labels['label'].value_counts().to_dict()}")

## Part 3: Meta-Labeling Model

In [None]:
# Meta-labels: did the primary signal's trade make money?
common_idx = labels.index.intersection(spy.dropna(subset=['sma_200']).index)
meta_df = labels.loc[common_idx].copy()
meta_df['signal'] = spy.loc[common_idx, 'signal']
meta_df['meta_label'] = (meta_df['ret'] * meta_df['signal'] > 0).astype(int)

# Features for meta-model
spy_aligned = spy.loc[common_idx].copy()
X_meta = pd.DataFrame(index=common_idx)
X_meta['vol_20d'] = spy_aligned['ret'].rolling(20).std()
X_meta['vol_60d'] = spy_aligned['ret'].rolling(60).std()
X_meta['mom_20d'] = spy_aligned['Close'].pct_change(20)
X_meta['mom_60d'] = spy_aligned['Close'].pct_change(60)
X_meta['volume_ratio'] = spy_aligned['Volume'].rolling(5).mean() / spy_aligned['Volume'].rolling(60).mean()
X_meta['signal'] = meta_df['signal']
X_meta['sma_spread'] = (spy_aligned['sma_50'] - spy_aligned['sma_200']) / spy_aligned['Close']

# Align and drop NaNs
valid = X_meta.dropna().index.intersection(meta_df.index)
X = X_meta.loc[valid]
y = meta_df.loc[valid, 'meta_label']

print(f"Meta-model training data: {len(X)} samples")
print(f"Meta-label balance: {y.mean():.1%} profitable trades")

In [None]:
# Train/test split (temporal)
split_date = pd.Timestamp('2020-01-01')
X_train, X_test = X.loc[:split_date], X.loc[split_date:]
y_train, y_test = y.loc[:split_date], y.loc[split_date:]

meta_model = RandomForestClassifier(n_estimators=200, max_depth=5, random_state=42)
meta_model.fit(X_train.values, y_train.values)

# Predictions
meta_proba = meta_model.predict_proba(X_test.values)[:, 1]
meta_pred = (meta_proba > 0.5).astype(int)

print(f"Meta-model OOS accuracy: {accuracy_score(y_test, meta_pred):.3f}")
print(f"Meta-model OOS precision: {precision_score(y_test, meta_pred):.3f}")
print(f"Trade filter rate: {1 - meta_pred.mean():.1%} of trades filtered out")

## Part 4: Purged K-Fold CV

In [None]:
class PurgedKFold:
    """Purged K-Fold CV for financial data."""

    def __init__(self, n_splits=5, embargo_days=10):
        self.n_splits = n_splits
        self.embargo_days = embargo_days

    def split(self, dates):
        unique_dates = np.sort(np.unique(dates))
        fold_size = len(unique_dates) // self.n_splits

        for i in range(self.n_splits):
            test_start = unique_dates[i * fold_size]
            test_end = unique_dates[min((i + 1) * fold_size - 1, len(unique_dates) - 1)]

            embargo_start = test_start - pd.Timedelta(days=self.embargo_days)
            embargo_end = test_end + pd.Timedelta(days=self.embargo_days)

            test_mask = (dates >= test_start) & (dates <= test_end)
            train_mask = (dates < embargo_start) | (dates > embargo_end)

            train_idx = np.where(train_mask)[0]
            test_idx = np.where(test_mask)[0]

            if len(train_idx) > 0 and len(test_idx) > 0:
                yield train_idx, test_idx


# Compare standard vs purged
scores_standard, scores_purged = [], []

kf = KFold(n_splits=5, shuffle=True, random_state=42)
for tr, te in kf.split(X):
    m = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    m.fit(X.values[tr], y.values[tr])
    scores_standard.append(m.score(X.values[te], y.values[te]))

pkf = PurgedKFold(n_splits=5, embargo_days=15)
for tr, te in pkf.split(X.index.values):
    m = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
    m.fit(X.values[tr], y.values[tr])
    scores_purged.append(m.score(X.values[te], y.values[te]))

print(f"Standard K-Fold: {np.mean(scores_standard):.3f} ± {np.std(scores_standard):.3f}")
print(f"Purged K-Fold:   {np.mean(scores_purged):.3f} ± {np.std(scores_purged):.3f}")
print(f"Information leakage: {np.mean(scores_standard) - np.mean(scores_purged):.3f}")

## Part 5: Full Pipeline Backtest

In [None]:
# Backtest: primary alone vs primary + meta-filter
test_spy = spy.loc[split_date:].copy()
test_spy = test_spy.loc[test_spy.index.isin(X_test.index)]

# Primary alone
primary_returns = test_spy['ret'] * test_spy['signal'].shift(1)
primary_returns = primary_returns.dropna()

# Primary + meta-filter
meta_filter = pd.Series(meta_proba, index=X_test.index)
meta_signal = test_spy['signal'].copy()
meta_signal.loc[meta_filter.index] *= (meta_filter > 0.5).astype(float)
meta_returns = test_spy['ret'] * meta_signal.shift(1)
meta_returns = meta_returns.dropna()

def compute_metrics(rets, name):
    ann_ret = rets.mean() * 252
    ann_vol = rets.std() * np.sqrt(252)
    sharpe = ann_ret / ann_vol if ann_vol > 0 else 0
    cum = (1 + rets).cumprod()
    max_dd = (cum / cum.cummax() - 1).min()
    hit_rate = (rets > 0).mean()
    return {'Strategy': name, 'Sharpe': sharpe, 'Return': ann_ret,
            'Max DD': max_dd, 'Hit Rate': hit_rate}

results = pd.DataFrame([
    compute_metrics(primary_returns, 'Primary Only'),
    compute_metrics(meta_returns, 'Primary + Meta'),
]).set_index('Strategy')

for col in ['Sharpe', 'Hit Rate']:
    results[col] = results[col].apply(lambda x: f"{x:.3f}")
for col in ['Return', 'Max DD']:
    results[col] = results[col].apply(lambda x: f"{x:.1%}")

print(results)

# Plot
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot((1 + primary_returns).cumprod(), label='Primary Only')
ax.plot((1 + meta_returns).cumprod(), label='Primary + Meta', linestyle='--')
ax.set_title('Primary Model vs. Meta-Labeled Pipeline')
ax.set_ylabel('Cumulative Return')
ax.legend()
plt.tight_layout()
plt.show()