# Notebook for CME Futures Challenge

### The Rough Idea

Model indices as geometric brownian motion (dS/S = mudt + sigmadB)  
Model mu (market line) as a linear regression with numerous factors including economic, credit measures, etc  
Model sigma as a function of volatility including recent volatility and EMA (decay)  
Long/short based on futures mispricings based on our model  

# Downloading historical data for indices (S&P, NASDAQ, DJIA)

Imports

In [None]:
import yfinance as yf
import pandas as pd
import plotly.express as px
from typing import List, Dict

Make get_data function for downloading from yf

In [None]:
timeframe = '1000mo' # set timeframe

def get_data(tickers: List):
    data_dictionary = {}
    for ticker in tickers:
        data_dictionary[ticker] = yf.download(ticker, period=timeframe, interval='1d')
    return data_dictionary

Now let's get data for indices and display with pd

In [None]:
indices = ['^GSPC', '^IXIC', '^DJI'] # S&P, NASDAQ, DJIA
indexes = ['S&P', 'NASDAQ', 'DJIA'] # Names for reference later

etfs = ['SPY', 'QQQ', 'DIA']
futures = ['ES=F', 'NQ=F', 'YM=F']

data_dictionary = get_data(indices + etfs + futures)

#s_p = pd.DataFrame(data_dictionary['^GSPC'])
#nasdaq = pd.DataFrame(data_dictionary['^IXIC'])
#djia = pd.DataFrame(data_dictionary['^DJI'])

s_p = pd.DataFrame(data_dictionary['SPY'])
nasdaq = pd.DataFrame(data_dictionary['QQQ'])
djia = pd.DataFrame(data_dictionary['DIA'])

s_p_F = pd.DataFrame(data_dictionary['ES=F'])
nasdaq_F = pd.DataFrame(data_dictionary['NQ=F'])
djia_F = pd.DataFrame(data_dictionary['YM=F'])

In [None]:
s_p

We need to flatten this - notice ticker header

In [None]:
# Indexes
s_p = s_p.droplevel(1, axis=1)
nasdaq = nasdaq.droplevel(1, axis=1)
djia = djia.droplevel(1, axis=1)

# Futures
s_p_F = s_p_F.droplevel(1, axis=1)
nasdaq_F = nasdaq_F.droplevel(1, axis=1)
djia_F = djia_F.droplevel(1, axis=1)

In [None]:
s_p

Let's drop high, low, and open and rename columns

In [None]:
# Indices
s_p.drop(columns=['High', 'Low', 'Open'], inplace=True)
nasdaq.drop(columns=['High', 'Low', 'Open'], inplace=True)
djia.drop(columns=['High', 'Low', 'Open'], inplace=True)

s_p = s_p.rename(columns={'Close': 'S&P_Close', 'Volume': 'S&P_Volume'})
nasdaq = nasdaq.rename(columns={'Close': 'NASDAQ_Close', 'Volume': 'NASDAQ_Volume'})
djia = djia.rename(columns={'Close': 'DJIA_Close', 'Volume': 'DJIA_Volume'})

# Futures
s_p_F.drop(columns=['High', 'Low', 'Open'], inplace=True)
nasdaq_F.drop(columns=['High', 'Low', 'Open'], inplace=True)
djia_F.drop(columns=['High', 'Low', 'Open'], inplace=True)

s_p_F = s_p_F.rename(columns={'Close': 'S&P_Close', 'Volume': 'S&P_Volume'})
nasdaq_F = nasdaq_F.rename(columns={'Close': 'NASDAQ_Close', 'Volume': 'NASDAQ_Volume'})
djia_F = djia_F.rename(columns={'Close': 'DJIA_Close', 'Volume': 'DJIA_Volume'})

Let's get a quick plot of an index

In [None]:
fig = px.line(s_p, x=s_p.index, y="S&P_Close", title="S&P Daily Past 30 Years")
fig.show()

# Downloading historical data for our factor model

We are going to model the index as a geometric brownian motion, with the mu factor being a linear regression model with numerous inputs.  

## Factor considerations:  
### <u>Term structure</u>
###### Term spread (10Y-3M)

### <u>Credit conditions</u>
###### IG spread (BAA-AAA)

### <u>Valuation</u>
###### Forward E/P - real 10Y
###### Dividend yield

### <u>Economic</u>
###### Fed funds
###### Inflation (CPI)
###### DXY change (dollar index)  

### Some of these we can get from yahoo finance:  

In [None]:
tickers = [
    # Term structure
    '^TNX', # 10yr CBOE
    '^IRX', # 3m bill (on discount basis, need to convert to yield)

    # Economic
    'DX-Y.NYB', # Dollar index
]

data_dictionary = get_data(tickers)

ten_yr = pd.DataFrame(data_dictionary['^TNX']['Close'])
three_m = pd.DataFrame(data_dictionary['^IRX']['Close'])
dollar_index = pd.DataFrame(data_dictionary['DX-Y.NYB']['Close'])

Rename columns

In [None]:
ten_yr = ten_yr.rename(columns={'^TNX': 'ten_yr'})
three_m = three_m.rename(columns={'^IRX': 'three_m'})
dollar_index = dollar_index.rename(columns={'DX-Y.NYB': 'dollar_index'})

We should get dividend yield too

In [None]:
div_data = {}

for etf in etfs:
    ticker = yf.Ticker(etf)
    div = ticker.dividends
    price = ticker.history(timeframe)['Close']

    # Calculate dividend yield
    div_12m = div.rolling(window='365D', min_periods=1).sum()
    div_12m = div_12m.reindex(price.index, method='ffill')
    div_yield = div_12m / price
    div_data[etf] = div_yield

Fix index for all 3 and rename columns

In [None]:
div_data['SPY'].index = pd.to_datetime(div_data['SPY'].index).normalize().tz_localize(None) # Normalize puts date in format we want
div_data['QQQ'].index = pd.to_datetime(div_data['QQQ'].index).normalize().tz_localize(None) # Localize (none) makes sure it doesn't add our timezone
div_data['DIA'].index = pd.to_datetime(div_data['DIA'].index).normalize().tz_localize(None)

div_data['SPY'].name = 'SPY_div'
div_data['QQQ'].name = 'QQQ_div'
div_data['DIA'].name = 'DIA_div'

In [None]:
div_data['SPY']

### pandas_datareader lets us download fred data

In [None]:
from pandas_datareader import data as pdr
from datetime import datetime

In [None]:
start = datetime(1990,1,1) # Start date for download

# Macroeconomic data
gdp = pdr.DataReader("GDP", "fred", start)
cpi = pdr.DataReader("CPIAUCSL", "fred", start)
fedfunds = pdr.DataReader("FEDFUNDS", "fred", start)

# For some reason this download doesn't have the most recent fed funds rate
fedfunds = pd.concat([fedfunds['FEDFUNDS'], pd.Series([4.08], index=[datetime(2025,9,17)])])

# Credit risk data
ig_spread = pdr.DataReader("BAMLC0A4CBBB", "fred", start)   # BofA BBB corp minus Treasuries
#hy_spread = pdr.DataReader("BAMLH0A0HYM2", "fred", start)   # BofA US High Yield spread
#baa_spread = pdr.DataReader("BAA10Y", "fred", start)        # Moody’s Baa – 10Y Treasury

Rename series

In [None]:
cpi.name = 'CPI'
fedfunds.name = 'fed_funds'
ig_spread.name = 'credit_spread'

In [None]:
fred_data = [gdp, cpi, fedfunds, ig_spread]

# Last business day <= today
last_bday = pd.bdate_range(end=pd.Timestamp.today().normalize().tz_localize(None), periods=1)[0]

for i, df in enumerate(fred_data):
    s = df.squeeze() # make it a Series
    # Build a business-day index from the series start to last_bday
    bidx = pd.bdate_range(start=s.index.min(), end=last_bday)
    # Reindex to business days and forward-fill
    s = s.reindex(bidx, method='ffill')
    # Write back as a 1-col DataFrame with a proper name
    name = s.name if s.name else f"series_{i}"
    fred_data[i] = s.to_frame(name)

In [None]:
fred_data[0]

Let's build a master dataframe

In [None]:
toggle_futures = True
if toggle_futures:
    s_p = s_p_F
    nasdaq = nasdaq_F
    djia = djia_F

data = s_p.join([nasdaq, djia, div_data['SPY'], div_data['QQQ'], div_data['DIA'], ten_yr, three_m, dollar_index, fred_data[0], fred_data[1], fred_data[2], fred_data[3]])
data

# Linear regression model

### Feature Engineering

We need to be careful to not include things such as raw moving averages that will leak volatility information into our drift prediction  

In [None]:
import numpy as np

Function definitions to help out

In [None]:
def rolling_mean(data, window):
    return data.rolling(window, min_periods=window).mean()

Features

#  (TODO: look at making features like diffs for economic metrics, figure out when economic metrics are released vs reported in data)

In [None]:
# First, make log prices / volumes of our data, then log normal assumptions are better and everything is additive

for index in indexes:
    data[f'{index}_log_price'] = np.log(data[f'{index}_Close']) # Log prices
    data[f'{index}_log_volume'] = np.log(data[f'{index}_Volume']) # Log volume

Setting our target returns metric

In [None]:
# Log returns (21 = 1 month)
days = 1

for index in indexes:
    data[f'{index}_log_ret'] = data[f'{index}_log_price'].diff(days)

# Next month log returns -- This will be our target variable
data[['S&P_next_ret','NASDAQ_next_ret','DJIA_next_ret']] = data[['S&P_log_ret','NASDAQ_log_ret','DJIA_log_ret']].shift(-days).dropna()

ETF Features

In [None]:
for index in indexes:
    # Price-based
    data[f'{index}_mom_1w'] = data[f'{index}_log_price'].diff(5) # Total price change / momentum indicator
    data[f'{index}_mom_3m'] = data[f'{index}_log_price'].diff(63)
    data[f'{index}_3m_rolling_price'] = rolling_mean(data[f'{index}_log_price'], 63)
    data[f'{index}_trend_speed_price'] = data[f'{index}_3m_rolling_price'].diff(5)  # How fast the 3m trend is changing on a weekly basis
    data[f'{index}_trend_dist_price'] = data[f'{index}_log_price'] - data[f'{index}_3m_rolling_price']


    # Volume-based (essentially the same as price for now)
    data[f'{index}_vlm_1w'] = data[f'{index}_log_volume'].diff(5) # Total volume change / momentum indicator
    data[f'{index}_vlm_1m'] = data[f'{index}_log_volume'].diff(21)
    data[f'{index}_vlm_3m'] = data[f'{index}_log_volume'].diff(63)
    data[f'{index}_3m_rolling_volume'] = rolling_mean(data[f'{index}_log_volume'], 63)
    data[f'{index}_trend_speed_volume'] = data[f'{index}_3m_rolling_volume'].diff(5)  # How fast the 3m trend is changing on a weekly basis
    data[f'{index}_trend_dist_volume'] = data[f'{index}_log_volume'] - data[f'{index}_3m_rolling_volume']

Macro features

Volatility / Price Features (From ChatGPT)

In [None]:
EPS = 1e-12

# If you already have a rolling_mean helper, keep it. Otherwise:
def rolling_mean(s, w):
    return s.rolling(w, min_periods=max(2, int(w*0.6))).mean()

def ewma_vol(r, lam=0.94):
    # EWMA variance per RiskMetrics: sigma_t^2 = (1-lam)*r_{t-1}^2 + lam*sigma_{t-1}^2
    # Use pandas ewm for convenience
    return r.pow(2).ewm(alpha=(1-lam), adjust=False).mean().clip(lower=0)

def rolling_autocorr(x, lag=1, window=63):
    # Rolling autocorrelation of x at a given lag
    # For stability, require at least ~60% of window
    minp = max(10, int(window*0.6))
    x0 = x
    x1 = x.shift(lag)
    return x0.rolling(window, min_periods=minp).corr(x1)

def realized_quarticity(r, window=63):
    # 3-month robust quarticity proxy (if daily): sum r^4 * (n / 3) approximation
    # Here we simply provide rolling sum of r^4; scaling optional depending on use
    minp = max(10, int(window*0.6))
    return (r.pow(4)).rolling(window, min_periods=minp).sum()

def build_vol_features(data, prefix, day_w=21, qtr_w=63, yr_w=252, ewma_lambda=0.94):
    """
    Expects:
      data[f'{prefix}_log_price'] (daily log price)
      data[f'{prefix}_log_volume'] (daily log volume)
    Produces a suite of volatility-centric features for that prefix.
    """
    lp = data[f"{prefix}_log_price"]
    lv = data.get(f"{prefix}_log_volume", None)

    # Daily log return
    r = lp.diff()  # already log-price, so diff = log-return

    # --- Realized volatility proxies ---
    data[f"{prefix}_rv_1m"]  = r.rolling(day_w, min_periods=int(day_w*0.6)).var().clip(lower=0)          # variance
    data[f"{prefix}_rv_3m"]  = r.rolling(qtr_w, min_periods=int(qtr_w*0.6)).var().clip(lower=0)
    data[f"{prefix}_rv_1y"]  = r.rolling(yr_w,  min_periods=int(yr_w*0.6)).var().clip(lower=0)
    data[f"{prefix}_absrv_1m"] = r.abs().rolling(day_w, min_periods=int(day_w*0.6)).mean()               # mean |r|
    data[f"{prefix}_absrv_3m"] = r.abs().rolling(qtr_w, min_periods=int(qtr_w*0.6)).mean()

    # EWMA volatility (RiskMetrics-style)
    data[f"{prefix}_ewma_var"] = ewma_vol(r, lam=ewma_lambda)
    data[f"{prefix}_ewma_vol"] = np.sqrt(data[f"{prefix}_ewma_var"])

    # Volatility-of-volatility (how fast vol is changing)
    data[f"{prefix}_vol_speed_1w"] = data[f"{prefix}_rv_3m"].diff(5)                                      # weekly change in 3m var
    data[f"{prefix}_vol_mom_1m"]   = data[f"{prefix}_rv_3m"] - data[f"{prefix}_rv_1m"]                    # 3m vs 1m
    data[f"{prefix}_vol_mom_1y"]   = data[f"{prefix}_rv_1y"] - data[f"{prefix}_rv_3m"]

    # Volatility clustering proxies
    data[f"{prefix}_acf_sqret_lag1_3m"] = rolling_autocorr(r.pow(2), lag=1, window=qtr_w)
    data[f"{prefix}_acf_absret_lag1_3m"] = rolling_autocorr(r.abs(), lag=1, window=qtr_w)

    # Leverage effect proxy (contemporaneous corr between return and next day's vol)
    # Negative returns often precede higher vol; we proxy with corr(r_t, |r|_{t+1})
    data[f"{prefix}_lev_proxy_3m"] = r.rolling(qtr_w, min_periods=int(qtr_w*0.6)).corr(r.abs().shift(-1))

    # Quarticity (heavy tails proxy)
    data[f"{prefix}_quarticity_3m"] = realized_quarticity(r, window=qtr_w)

    # Ratio features (normalized vol levels)
    data[f"{prefix}_vol_ratio_1m_3m"] = (data[f"{prefix}_rv_1m"] / (data[f"{prefix}_rv_3m"] + EPS))
    data[f"{prefix}_vol_ratio_3m_1y"] = (data[f"{prefix}_rv_3m"] / (data[f"{prefix}_rv_1y"] + EPS))
    data[f"{prefix}_ewma_over_3m"]    = (data[f"{prefix}_ewma_var"] / (data[f"{prefix}_rv_3m"] + EPS))

    # Price–volatility relation: distance from trend as a stress proxy
    data[f"{prefix}_price_trend_3m"]  = rolling_mean(lp, qtr_w)
    data[f"{prefix}_price_trend_dist"] = lp - data[f"{prefix}_price_trend_3m"]
    # Volatility when far below trend often spikes; include interaction
    data[f"{prefix}_vol_x_trend_dist"] = data[f"{prefix}_rv_1m"] * data[f"{prefix}_price_trend_dist"]

    # Volume–volatility links (if volume available)
    if lv is not None:
        dv = lv.diff()  # log-volume change
        data[f"{prefix}_vlm_var_1m"] = dv.rolling(day_w, min_periods=int(day_w*0.6)).var().clip(lower=0)
        data[f"{prefix}_vlm_var_3m"] = dv.rolling(qtr_w, min_periods=int(qtr_w*0.6)).var().clip(lower=0)
        # Corr between |r| and volume changes (vol–volume clustering)
        data[f"{prefix}_corr_absr_dlv_3m"] = r.abs().rolling(qtr_w, min_periods=int(qtr_w*0.6)).corr(dv)
        # Volume surprise proxy: current vs 3m trend
        data[f"{prefix}_vlm_trend_3m"] = rolling_mean(lv, qtr_w)
        data[f"{prefix}_vlm_trend_dist"] = lv - data[f"{prefix}_vlm_trend_3m"]
        # Vol reacts to volume surprises
        data[f"{prefix}_vol_x_vlm_surprise"] = data[f"{prefix}_rv_1m"] * data[f"{prefix}_vlm_trend_dist"]

    # Optional: implied vs realized vol spread if you have VIX-like series
    # if f"{prefix}_impl_vol" in data.columns:
    #     data[f"{prefix}_ivr_spread"] = data[f"{prefix}_impl_vol"]**2 - data[f"{prefix}_rv_1m"]

    # Forward-looking realized vol target example (if needed)
    # data[f"{prefix}_fwd_rv_1m"] = r.shift(-1).rolling(day_w, min_periods=int(day_w*0.6)).var()

    return data

# ---- Apply to all indices ----
for index in indexes:
    data = build_vol_features(data, index, day_w=21, qtr_w=63, yr_w=252, ewma_lambda=0.94)

# ---- Cross-index spillover features (optional but useful) ----
# Differences/spreads in contemporaneous vol across indices capture contagion/regime moves
data["SPX_minus_NDX_vol_1m"] = data["S&P_rv_1m"] - data["NASDAQ_rv_1m"]
data["SPX_minus_DJIA_vol_1m"] = data["S&P_rv_1m"] - data["DJIA_rv_1m"]
data["NDX_minus_DJIA_vol_1m"] = data["NASDAQ_rv_1m"] - data["DJIA_rv_1m"]

# A simple global vol factor: first PC of 3m realized vars (if you want a single factor)
try:
    _X = data[["S&P_rv_3m", "NASDAQ_rv_3m", "DJIA_rv_3m"]].dropna()
    _Xc = (_X - _X.mean()) / (_X.std(ddof=0) + EPS)
    # first PC (no scikit-learn to keep it lightweight)
    U, S, Vt = np.linalg.svd(_Xc.values, full_matrices=False)
    gvol = pd.Series(U[:, 0]*S[0], index=_X.index, name="global_vol_pc1")
    data["global_vol_pc1"] = gvol.reindex(data.index)
except Exception:
    pass

Other features

In [None]:
month_dummies = pd.get_dummies(data.index.month, prefix="month")
month_dummies.set_index(data.index, inplace=True)
data = data.join(month_dummies)

### Preprocessing Data

Let's check for NaNs

In [None]:
data.isna().sum()

Impute some NaNs with average

In [None]:
impute_columns = ['ten_yr', 'three_m', 'dollar_index', 'BAMLC0A4CBBB']

for column in impute_columns:
    data[column] = data[column].fillna(data[column].mean())


#data['S&P_ret'] = data['S&P_ret'].fillna(data['S&P_ret'].mean())
#data['NASDAQ_ret'] = data['NASDAQ_ret'].fillna(data['NASDAQ_ret'].mean())
#data['DJIA_ret'] = data['DJIA_ret'].fillna(data['DJIA_ret'].mean())

Drop others

In [None]:
data = data.dropna()
data

### Split data

Training/testing 80/20 split

In [None]:
import math

In [None]:
def split_data(data, split=0.8):
    cutoff = math.floor(len(data)*split)
    training_data = data.iloc[:cutoff]
    testing_data = data.iloc[cutoff:]
    return training_data, testing_data

In [None]:
training_data, testing_data = split_data(data)

### Normalize inputs

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
def scale_features(training_data, testing_data, features):
    scaler = StandardScaler()
    scaler.fit(training_data[features]) # Fitting on training data

    train_scaled = training_data.copy()
    test_scaled = testing_data.copy()

    train_scaled[features] = scaler.transform(training_data[features])
    test_scaled[features] = scaler.transform(testing_data[features])

    # Save info on standardization for later
    scaler_mu = pd.Series(scaler.mean_, index=features)
    scaler_std = pd.Series(scaler.scale_, index=features)
    return train_scaled, test_scaled, scaler_mu, scaler_std

In [None]:
# Make sure we only fit on training_data and explanatory variables
targets = [f'{index}_next_ret' for index in indexes]
dummies = [f'month_{month}' for month in range(1,13)]
columns_to_ignore = [] #['NASDAQ_next_ret', 'DJIA_next_ret']
columns_to_ignore.extend(dummies)
features = [column for column in training_data.columns if column not in targets and column not in columns_to_ignore]

train_scaled, test_scaled, scaler_mu, scaler_std = scale_features(training_data, testing_data, features)

### Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import r2_score, root_mean_squared_error
from sklearn.decomposition import PCA

We are going to test with and without ridge (which will help reduce the impact of collinearity)

In [None]:
# Function to get print results from the models
def eval_and_report(y_true, y_pred, model_name):
    print(f"{model_name:18s} | R^2: {r2_score(y_true, y_pred):.4f} | RMSE: {root_mean_squared_error(y_true, y_pred):.6f}")

Training function

In [None]:
def train_models(train_scaled, test_scaled, targets):
    results = {}
    for target in targets:
        print(f"\n=== Target: {target} ===")
        X_train = train_scaled[features].copy()
        y_train = train_scaled[target].copy()
        X_test = test_scaled[features].copy()
        y_test = test_scaled[target].copy()

        # 1. Ordinary Least Squares (OLS)
        ols = LinearRegression()
        ols.fit(X_train, y_train)
        yhat_ols = ols.predict(X_test)
        eval_and_report(y_test, yhat_ols, "OLS")

        # Print top coefficients
        ols_coef = pd.Series(ols.coef_, index=features).sort_values(key=np.abs, ascending=False)
        print("Top OLS coeffs:\n", ols_coef.head(10))

        # 2. Ridge with CV over alphas (time-series CV)
        tscv = TimeSeriesSplit(n_splits=5)
        alphas = np.logspace(-4, 3, 30)

        ridge = RidgeCV(alphas=alphas, cv=tscv, fit_intercept=True)
        ridge.fit(X_train, y_train)
        yhat_ridge = ridge.predict(X_test)
        eval_and_report(y_test, yhat_ridge, f"Ridge (alpha={ridge.alpha_:.4g})")

        # Print top coefficients
        ridge_coef = pd.Series(ridge.coef_, index=features).sort_values(key=np.abs, ascending=False)
        print("Top Ridge coeffs:\n", ridge_coef.head(10))

        # 3. Lasso with CV over alphas (time-series CV)
        tscv = TimeSeriesSplit(n_splits=5)
        alphas = np.logspace(-4, 3, 30)

        lasso = LassoCV(alphas=alphas, cv=tscv, fit_intercept=True)
        lasso.fit(X_train, y_train)
        yhat_lasso = lasso.predict(X_test)
        eval_and_report(y_test, yhat_lasso, f"Lasso (alpha={lasso.alpha_:.4g})")

        # Print top coefficients
        lasso_coef = pd.Series(lasso.coef_, index=features).sort_values(key=np.abs, ascending=False)
        print("Top Lasso coeffs:\n", lasso_coef.head(10))

        # 4. ElasticNet with CV over alphas and l1_ratios (time-series CV)
        tscv = TimeSeriesSplit(n_splits=5)
        alphas = np.logspace(-4, 3, 40)
        l1_ratios = np.arange(.1, 1, .1)   # 1.0 == Lasso, 0.0 == Ridge

        enet = ElasticNetCV(
            alphas=alphas,
            l1_ratio=l1_ratios,
            cv=tscv,
            fit_intercept=True,
            max_iter=20000,
        )
        enet.fit(X_train, y_train)

        yhat_enet = enet.predict(X_test)
        eval_and_report(y_test, yhat_enet, f"ElasticNet (alpha={enet.alpha_:.4g}, l1_ratio={enet.l1_ratio_})")

        # Print top coefficients
        enet_coef = pd.Series(enet.coef_, index=features).sort_values(key=np.abs, ascending=False)
        print("Top ElasticNet coeffs:\n", enet_coef.head(10))

        # 5. PCA on OLS
        pca = PCA(n_components=.95).fit(train_scaled) # keep 95% of variance and fit to training set
        train_pca = pca.transform(train_scaled)
        test_pca = pca.transform(test_scaled)

        ols_pca = LinearRegression()
        ols_pca.fit(train_pca, y_train)
        yhat_pca = ols_pca.predict(test_pca)
        eval_and_report(y_test, yhat_pca, "OLS+PCA")

        # Store for later use
        results[target] = {
            "ols_model": ols,
            "ridge_model": ridge,
            "ols_coefs": ols_coef,
            "ridge_coefs": ridge_coef,
            "lasso_coefs": lasso_coef,
            "enet_coegs": enet_coef,
            "pca_model": ols_pca,
            "train_data_ols": pd.Series(ols.predict(X_train), index=y_train.index, name=f"ols_train"),
            "train_data_ridge": pd.Series(ridge.predict(X_train), index=y_train.index, name=f"ridge_train"),
            "train_data_lasso": pd.Series(lasso.predict(X_train), index=y_train.index, name=f"lasso_train"),
            "train_data_enet": pd.Series(enet.predict(X_train), index=y_train.index, name=f"enet_train"),
            "train_data_pca": pd.Series(ols_pca.predict(train_pca), index=y_train.index, name=f"pca_train"),
            "yhat_ols": pd.Series(yhat_ols, index=y_test.index, name=f"{target}_ols_pred"),
            "yhat_ridge": pd.Series(yhat_ridge, index=y_test.index, name=f"{target}_ridge_pred"),
            "yhat_lasso": pd.Series(yhat_lasso, index=y_test.index, name=f"{target}_lasso_pred"),
            "yhat_enet": pd.Series(yhat_lasso, index=y_test.index, name=f"{target}_enet_pred"),
            "yhat_pca": pd.Series(yhat_pca, index=y_test.index, name=f"{target}_pca_pred")
        }
    return results

In [None]:
results = train_models(train_scaled, test_scaled, targets)

Make into new df

In [None]:
models = ['ridge', 'lasso', 'enet', 'pca']
drift = data.copy()

for target in targets:
    for model in models:
        drift[f'{target}_pred_{model}'] = pd.concat([results[target][f'train_data_{model}'],results[target][f'yhat_{model}']])
drift.dropna(inplace=True)

In [None]:
drift

Plot these results

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [None]:
def plot_results(data, baseline, measures, title, subtitles, indexes, models):
    fig = make_subplots(
        rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.06,
        subplot_titles=[index for index in indexes]
    )

    for i, (index) in enumerate(indexes, start=1):
        show_leg = (i == 1)

        # True next_ret
        fig.add_trace(
            go.Scatter(x=data.index, y=data[f'{index}_{baseline}'], name=f'{index}_{baseline}',
                    mode="lines", line=dict(width=1.6),
                    showlegend=show_leg, legendgroup="true"),
            row=i, col=1
        )

        # Plot predictions
        for model in models:
            for measure in measures:
                fig.add_trace(
                    go.Scatter(x=data.index, y=data[f'{index}_{measure}_{model}'], name=f"{model} prediction",
                            mode="lines", line=dict(width=1.4, dash="dot"),
                            showlegend=show_leg, legendgroup="ridge"),
                    row=i, col=1
                )

    fig.update_layout(
        title=title,
        height=900,
        hovermode="x unified",
        template="plotly_white",
        margin=dict(t=80, r=30, b=80, l=70),
        legend=dict(orientation="h", yanchor="top", y=-0.12, xanchor="left", x=0)
    )

    for r in range(1, 4):
        fig.update_yaxes(title_text=subtitles, row=r, col=1)

    fig.show()

In [None]:
plot_results(drift, 'log_ret', ['next_ret_pred'], 'Linear Regression on Drift', 'Log Return', indexes, models)

Overall returns

In [None]:
starting_prices = {index: drift[f'{index}_Close'].iloc[0] for index in indexes}

for index in indexes:
    for model in models:
        drift[f'{index}_price_pred_{model}'] = starting_prices[index] * np.exp(drift[f'{index}_next_ret_pred_{model}'].cumsum())

In [None]:
plot_results(drift, 'Close', ['price_pred'], 'Price Path Evolution of Drift', 'Price', indexes, models)

# It looks like this may be an ok base for mu. Let's try to build sigma.

In [None]:
vol_data = data.copy()
vol_data



In [None]:
# Squared residuals from previous -- using ElasticNet
# Turns out we actually need to also take the log because they are so tiny

for index in indexes:
    vol_data[f'{index}_log_squared_residual'] = np.log(np.square(drift[f'{index}_next_ret'] - drift[f'{index}_next_ret_pred_enet']))

In [None]:
training_data, testing_data = split_data(vol_data)

# Make sure we only fit on training_data and explanatory variables
targets = [f'{index}_log_squared_residual' for index in indexes]
dummies = [f'month_{month}' for month in range(1,13)]
features = [column for column in training_data.columns if column not in targets and column not in dummies]

train_scaled, test_scaled, vol_scaler_mu, vol_scaler_std = scale_features(training_data, testing_data, features)

In [None]:
vol_results = train_models(train_scaled, test_scaled, targets)

Make into new df

In [None]:
diffusion = vol_data.copy()

for target in targets:
    for model in models:
        diffusion[f'{target}_pred_{model}'] = pd.concat([vol_results[target][f'train_data_{model}'], vol_results[target][f'yhat_{model}']])

diffusion.dropna(inplace=True)

In [None]:
plot_results(diffusion, 'log_squared_residual', ['log_squared_residual_pred'], 'Linear Regression on Diffusion', 'Log Residuals', indexes, models)

No clue how PCA could be at r^2 = .999 in the test set while all other models are around .1-.2 ... clearly some sort of overfitting going on there. Also looks like log transform loses a significant amount of data, but I think the distribution is better.

Convert to variance / std

In [None]:
for index in indexes:
    for model in models:
        diffusion[f'{index}_variance_pred_{model}'] = np.exp(diffusion[f'{index}_log_squared_residual_pred_{model}'])
        diffusion[f'{index}_std_pred_{model}'] = np.sqrt(diffusion[f'{index}_variance_pred_{model}'])

In [None]:
predictions = pd.merge(drift, diffusion, left_index=True, right_index=True, how='inner', suffixes=('', '_to_drop'))
predictions = predictions.drop(columns=[col for col in predictions.columns if col.endswith('_to_drop')])


# Boll bands with enet
def create_boll_bands(vol=None, std=2, window=20):
    if vol != None:
        use_vol = True
    else:
        use_vol = False # Could be a better way to do this, but allowing later for different vol df

    for index in indexes:
        for model in models:
            if not use_vol:
                vol = predictions[f'{index}_std_pred_{model}']
            predictions[f'{index}_sma'] = predictions[f'{index}_Close'].rolling(window=window, min_periods=window).mean()
            predictions[f'{index}_upper_boll_{model}'] = std * vol*predictions[f'{index}_sma'] + predictions[f'{index}_sma']
            predictions[f'{index}_lower_boll_{model}'] = -std * vol*predictions[f'{index}_sma'] + predictions[f'{index}_sma']

In [None]:
create_boll_bands()
plot_results(predictions, 'Close', ['upper_boll', 'lower_boll', 'price_pred'], 'Bollinger Bands', 'Price', indexes, ['enet'])

Let's see some stats on boll bands here

In [None]:
def set_boll_counts():
    for index in indexes:
        px_col  = f'{index}_Close'
        sma_col = f'{index}_sma'

        for model in models:
            std_col   = f'{index}_std_pred_{model}'
            up_col    = f'{index}_upper_boll_{model}'
            low_col   = f'{index}_lower_boll_{model}'
            sig_col   = f'{index}_boll_signal_{model}'            # -1 lower break, +1 upper break, 0 inside
            touchU    = f'{index}_touch_upper_{model}'            # boolean
            touchL    = f'{index}_touch_lower_{model}'
            crossU    = f'{index}_cross_above_upper_{model}'      # boolean: crossed today
            crossL    = f'{index}_cross_below_lower_{model}'
            countAny  = f'{index}_cum_band_breaks_{model}'        # cumulative count
            countU    = f'{index}_cum_upper_breaks_{model}'
            countL    = f'{index}_cum_lower_breaks_{model}'
            inBand    = f'{index}_inside_band_{model}'
            widthCol  = f'{index}_band_width_{model}'             # relative width

            # 3) Booleans: touches (price outside band)
            price = predictions[px_col]
            upper = predictions[up_col]
            lower = predictions[low_col]

            predictions[touchU] = (price >= upper)
            predictions[touchL] = (price <= lower)
            predictions[inBand] = (~predictions[touchU] & ~predictions[touchL])

            # 4) True "cross" events (crossed today vs yesterday)
            prev_price = price.shift(1)
            prev_up    = upper.shift(1)
            prev_low   = lower.shift(1)

            # Cross above upper: was <= upper yesterday and > upper today
            predictions[crossU] = (prev_price <= prev_up) & (price > upper)
            # Cross below lower: was >= lower yesterday and < lower today
            predictions[crossL] = (prev_price >= prev_low) & (price < lower)

            # 5) Compact signal: +1 if price above upper, -1 if below lower, else 0
            predictions[sig_col] = np.select(
                [predictions[touchU], predictions[touchL]],
                [1, -1],
                default=0
            ).astype(int)

            # 6) Cumulative counts you can tally quickly
            predictions[countU]   = predictions[crossU].cumsum()
            predictions[countL]   = predictions[crossL].cumsum()
            predictions[countAny] = (predictions[crossU] | predictions[crossL]).cumsum()

            # 7) Optional: rolling 20-day counts if you want “recent frequency”
            predictions[f'{index}_roll20_breaks_{model}'] = (
                (predictions[crossU] | predictions[crossL]).rolling(20, min_periods=1).sum()
            )

            # 8) Band width (relative, helpful for diagnostics/screening)
            predictions[widthCol] = (upper - lower) / predictions[sma_col]

In [None]:
def forward_returns(price, horizons=(1,3,5,10)):
    out = {}
    lp = np.log(price.astype(float))
    for h in horizons:
        out[h] = (lp.shift(-h) - lp)            # r_{t→t+h}, aligned at t
    return pd.DataFrame(out, index=price.index)

def reenter_band_within(pred_df, idx, mdl, horizon=5):
    """Boolean: did price re-enter band within 'horizon' days after being outside?"""
    px   = pred_df[f"{idx}_Close"]
    up   = pred_df[f"{idx}_upper_boll_{mdl}"]
    low  = pred_df[f"{idx}_lower_boll_{mdl}"]
    outside = (px > up) | (px < low)
    inside  = ~(outside)
    # rolling forward "any inside" within next N days (including next day)
    # Build a forward-looking window using shift(-k). We’ll OR across 1..N.
    any_inside_nextN = pd.Series(False, index=pred_df.index)
    for k in range(1, horizon+1):
        any_inside_nextN = any_inside_nextN | inside.shift(-k)
    return outside & any_inside_nextN

def summarize_boll_stats(predictions, indexes, models, horizons=(1,3,5,10), reenter_N=5):
    rows = []
    for idx in indexes:
        px = predictions[f"{idx}_Close"]
        fwd = forward_returns(px, horizons)
        for mdl in models:
            # Columns built earlier
            up     = predictions[f"{idx}_upper_boll_{mdl}"]
            low    = predictions[f"{idx}_lower_boll_{mdl}"]
            touchU = predictions[f"{idx}_touch_upper_{mdl}"].astype(bool)
            touchL = predictions[f"{idx}_touch_lower_{mdl}"].astype(bool)
            crossU = predictions[f"{idx}_cross_above_upper_{mdl}"].astype(bool)
            crossL = predictions[f"{idx}_cross_below_lower_{mdl}"].astype(bool)
            inside = predictions[f"{idx}_inside_band_{mdl}"].astype(bool)
            width  = (up - low) / predictions[f"{idx}_sma"]

            n = len(px.dropna())
            # base rates
            pct_inside = inside.mean()
            pct_touchU = touchU.mean()
            pct_touchL = touchL.mean()
            pct_crossU = crossU.mean()
            pct_crossL = crossL.mean()

            # forward return frames aligned to signal dates
            sigs = {
                "touchU": touchU,
                "touchL": touchL,
                "crossU": crossU,
                "crossL": crossL,
            }

            # re-entry (mean reversion back inside band) within N days after being outside
            reenter = reenter_band_within(predictions, idx, mdl, horizon=reenter_N)
            pct_reenter_after_outside = reenter.mean()

            row = {
                "index": idx,
                "model": mdl,
                "obs": n,
                "% inside": pct_inside,
                "% touchU": pct_touchU,
                "% touchL": pct_touchL,
                "% crossU": pct_crossU,
                "% crossL": pct_crossL,
                "band_width_med%": float(np.nanmedian(width))*100.0,
                "band_width_p90%": float(np.nanpercentile(width.dropna(), 90))*100.0,
                f"% reenter≤{reenter_N}d after outside": pct_reenter_after_outside,
            }

            # Hit rates & conditional forward returns
            for name, mask in sigs.items():
                m = mask.fillna(False)
                idx_sig = m[m].index
                if len(idx_sig) == 0:
                    # fill NaNs for empty signals
                    row.update({f"{name}_n": 0})
                    for h in horizons:
                        row.update({
                            f"{name}_hit{h}d%": np.nan,
                            f"{name}_mean{h}d(bp)": np.nan,
                            f"{name}_med{h}d(bp)": np.nan,
                        })
                    continue

                row[f"{name}_n"] = int(len(idx_sig))
                # Direction for "hit": breakout (crossU) expects +; crossL expects −; touches: mean-revert assumption
                for h in horizons:
                    fr = fwd[h].reindex(idx_sig)  # log fwd return
                    if name in ("crossU", "touchL"):
                        # bullish expectation
                        hit = (fr > 0)
                    elif name in ("crossL", "touchU"):
                        # bearish expectation
                        hit = (fr < 0)
                    else:
                        hit = fr > 0
                    row[f"{name}_hit{h}d%"]   = float(hit.mean())
                    # report in basis points for readability
                    row[f"{name}_mean{h}d(bp)"] = float(fr.mean() * 1e4)
                    row[f"{name}_med{h}d(bp)"]  = float(fr.median() * 1e4)

            rows.append(row)

    out = pd.DataFrame(rows)
    # Nice ordering
    base_cols = ["index","model","obs","% inside","% touchU","% touchL","% crossU","% crossL",
                 "band_width_med%","band_width_p90%", f"% reenter≤{reenter_N}d after outside"]
    # dynamically add signal metrics
    sig_cols = []
    for s in ("touchU","touchL","crossU","crossL"):
        sig_cols += [f"{s}_n"] + \
                    [f"{s}_hit{h}d% for h in horizons"]  # placeholder for ordering logic
    # We’ll just sort columns
    metric_cols = [c for c in out.columns if c not in base_cols]
    out = out[base_cols + sorted(metric_cols)]
    return out

# ---- run it ----
set_boll_counts()
predictions_train, predictions_test = split_data(predictions)
summary_train = summarize_boll_stats(predictions_train, indexes, models, horizons=(1,3,5,10), reenter_N=5)
summary_test = summarize_boll_stats(predictions_test, indexes, models, horizons=(1,3,5,10), reenter_N=5)

# Example: filter to signals with decent sample size and view
pd.set_option('display.max_columns', None)
display(summary_train.sort_values(["index","model"]))
display(summary_test.sort_values(["index","model"]))

# Backtesting

Sizing strategy 1: Using Merton-Kelly criterion

In [None]:
def get_merton_kelly_size(mu: float, rf: float, sigma: float):
    size = (mu - rf) / sigma**2
    return size

In [None]:
def get_kelly_criterion(odds, p, q):
    return (odds*p - q)/odds

Sizing strategy 2: Using vol target

In [None]:
def get_vol_target_sizing(target: float, vol: float):
    size = (target/vol)
    return size

## Trading strategy 1: Trade based on our models

### Backtest

In [None]:
for index in indexes:
    predictions[f'{index}_baseline_wealth'] = np.exp(bt[f'{index}_log_ret'].cumsum()) * 10000

Might be slower, but much easier to implement strategy using iterative process

In [None]:
# Create backtest df copy
bt = predictions.copy()

# Set initial conditions
starting_cash = 10000
bt_data = {}

for index in indexes:
    bt_data[index] = {}

    for model in models:
        bt[f'{index}_portfolio_value_{model}'] = 0
        bt[f'{index}_cash_{model}'] = 0
        bt[f'{index}_cash_{model}'].iloc[0] = starting_cash
        bt[f'{index}_signal_{model}'] = 0

        bt_data[index][model] = {
            'last_value': 0,
            'last_cash': starting_cash
        }

# Starts at second day
for idx, row in bt.iloc[1:].iterrows():
    for index in indexes:
        for model in models:
            last_value = bt_data[index][model]['last_value']
            last_cash = bt_data[index][model]['last_cash']

            close = row[f'{index}_Close']

            mu = np.exp(row[f'{index}_next_ret_pred_enet'])
            vol = row[f'{index}_std_pred_{model}']
            rf = (1+row['ten_yr'])**(1/252)-1 # Daily
            investment = 0

            # Update portfolio value
            current_value = last_value * np.exp(row[f'{index}_log_ret'])

            # Calculate total equity
            total_equity = current_value + last_cash 

            # Get bet sizing with Merton-Kelly
            #target_pos = get_merton_kelly_size(mu, rf, vol) * total_equity # Daily rf
            #target_pos = max(-total_equity, min(target_pos, total_equity)) # Make sure we don't exceed equity


            # Get bet sizing with vol target
            target_pos = get_vol_target_sizing(.1/np.sqrt(252), vol) * total_equity
            target_pos = max(-total_equity, min(target_pos, total_equity)) # Make sure we don't exceed equity
            
            # Bet sizing based on size of potential increase/decrease, signed
            #target_pos = (mu - rf)/abs(mu - rf) * total_equity # super leverage
            #target_pos = max(-total_equity, min(target_pos, total_equity)) # Make sure we don't exceed equity

            # Execute trade / Update investment amount to reach target position
            investment = target_pos - current_value

            # Update cash
            last_cash = bt_data[index][model]['last_cash'] = last_cash - investment
            bt.loc[idx, f'{index}_cash_{model}'] = last_cash

            # Store portfolio value as last value for calculation
            bt_data[index][model]['last_value'] = current_value + investment
            bt.loc[idx, f'{index}_portfolio_value_{model}'] = bt_data[index][model]['last_value']

            # Set signal for plotting later
            if investment > 0:
                bt.loc[idx, f'{index}_signal_{model}'] = 1
            elif investment < 0:
                bt.loc[idx, f'{index}_signal_{model}'] = -1
    
for index in indexes:
    for model in models:
        bt[f'{index}_total_value_{model}'] = bt[f'{index}_portfolio_value_{model}'] + bt[f'{index}_cash_{model}']
        final_value = bt[f'{index}_total_value_{model}'].iloc[-1]
        print(f'Final value for {index}, {model}: {final_value:.2f}')
    print('\n')

In [None]:
plot_results(bt, 'baseline_wealth', ['total_value'], 'Backtest Performance', 'Wealth', indexes, models)

In [None]:
def sharpe_and_mdd(df, col_equity, col_rf="ten_yr", periods_per_year=252):
    """
    df: DataFrame with equity column and risk-free rate column
    col_equity: name of the equity curve column
    col_rf: name of risk-free column in same df
    periods_per_year: 252 for daily data
    """
    eq = df[col_equity]
    rf = df[col_rf]

    # compute portfolio simple returns
    port_rets = eq.pct_change().dropna()
    # align rf
    rf_aligned = rf.reindex(port_rets.index).astype(float)

    # excess returns (assuming rf is already per-period, e.g. daily)
    excess = port_rets - ((1+rf_aligned/100)**(1/periods_per_year)-1)
    sharpe = (excess.mean() / excess.std()) * np.sqrt(periods_per_year) if excess.std() > 0 else np.nan

    # max drawdown
    running_max = eq.cummax()
    drawdown = eq / running_max - 1.0
    max_dd = drawdown.min()

    return sharpe, max_dd

In [None]:
cutoff = math.floor(len(bt)*.8)
training_returns = bt.iloc[:cutoff]
testing_returns = bt.iloc[cutoff:]

for index in indexes:
    for model in models:
        total_value = f'{index}_total_value_{model}'
        final_training_value = training_returns[total_value].iloc[-1]
        training_cagr = 100 * ((final_training_value/training_returns[total_value].iloc[0])**(1/(training_returns.index[-1].year - training_returns.index[0].year + training_returns.index[-1].month/12))-1)
        print(f'Final training value for {index}, {model}: {final_training_value}, CAGR: {training_cagr:.2f}%')

        final_testing_value = testing_returns[total_value].iloc[-1]
        testing_cagr = 100*((final_testing_value/testing_returns[total_value].iloc[0])**(1/(testing_returns.index[-1].year - testing_returns.index[0].year + testing_returns.index[-1].month/12))-1)
        print(f'Final testing value for {index}, {model}: {final_testing_value}, CAGR: {testing_cagr:.2f}%')
        
                # ---- Training metrics ----
        train_curve = training_returns[total_value]
        train_sharpe, train_mdd = sharpe_and_mdd(training_returns, total_value, col_rf="ten_yr")
        print(f"Training Sharpe: {train_sharpe:.2f}, Max Drawdown: {train_mdd:.2%}")

        # ---- Testing metrics ----
        test_curve = testing_returns[total_value]
        test_sharpe, test_mdd = sharpe_and_mdd(testing_returns, total_value, col_rf="ten_yr")
        print(f"Testing Sharpe: {test_sharpe:.2f}, Max Drawdown: {test_mdd:.2%}")

        print('\n')

Trading strategy 2: Trading using bollinger bands and moving average

In [None]:
# ---- helpers ----
def forward_log_returns(price: pd.Series, horizons=(1,3,5,10)) -> pd.DataFrame:
    lp = np.log(price.astype(float))
    out = {h: (lp.shift(-h) - lp) for h in horizons}  # log P_{t+h} - log P_t
    return pd.DataFrame(out, index=price.index)

def simple_from_log(x: pd.Series) -> pd.Series:
    return np.exp(x) - 1.0

def rf_over_h(annual_rf: float, H: int, periods_per_year=252) -> float:
    return (1.0 + annual_rf) ** (H/periods_per_year) - 1.0

def kelly_empirical(mean_R: float, var_R: float, rf_H: float,
                    frac: float=0.25, cap: float=1.0, eps: float=1e-12) -> float:
    var_R = max(var_R, eps)
    f_star = (mean_R - rf_H) / var_R
    return float(np.clip(frac * f_star, -cap, cap))

# ---- 1) build empirical horizon stats per (index, model) and signal ----
def build_signal_horizon_stats(pred, indexes, models, horizons=(1,3,5,10), min_signals=20):
    stats = {}  # stats[(idx,mdl)] = {"crossU": {"H":..,"mean_R":..,"var_R":..,"n":..}, "crossL": {...}}

    for idx in indexes:
        px   = pred[f"{idx}_Close"]
        fwdL = forward_log_returns(px, horizons=horizons)  # log returns t->t+H
        fwdS = fwdL.apply(simple_from_log)                 # simple returns

        for mdl in models:
            crossU = pred[f"{idx}_cross_above_upper_{mdl}"].astype(bool)
            crossL = pred[f"{idx}_cross_below_lower_{mdl}"].astype(bool)

            bestU = {"H": None, "mean_R": np.nan, "var_R": np.nan, "n": 0}
            bestL = {"H": None, "mean_R": np.nan, "var_R": np.nan, "n": 0}

            for H in horizons:
                # UP: we want big positive mean_R over crossU events
                ru = fwdS[H].where(crossU).dropna()
                if len(ru) >= min_signals:
                    mu_u, vu = float(ru.mean()), float(ru.var(ddof=1))
                    if (bestU["H"] is None) or (mu_u > bestU["mean_R"]):
                        bestU = {"H": H, "mean_R": mu_u, "var_R": vu, "n": int(len(ru))}
                # DOWN: we want big negative mean_R (i.e., large +mean of -R for shorts)
                rl = fwdS[H].where(crossL).dropna()
                if len(rl) >= min_signals:
                    mu_l, vl = float(rl.mean()), float(rl.var(ddof=1))
                    # pick the most negative mean_R
                    if (bestL["H"] is None) or (mu_l < bestL["mean_R"]):
                        bestL = {"H": H, "mean_R": mu_l, "var_R": vl, "n": int(len(rl))}

            stats[(idx, mdl)] = {"crossU": bestU, "crossL": bestL}
    return stats

# ---- 2) backtest: enter on cross, lock position H days, size with Kelly(mean_R_H, var_R_H) ----
def backtest_cross_empirical_kelly(pred, indexes, models, stats,
                                   rf_col="ten_yr", starting_cash=10_000,
                                   enter_next_bar=True, kelly_frac=0.25, kelly_cap=1.0):
    bt = pred.copy()
    for idx in indexes:
        for mdl in models:
            bt[f"{idx}_pos_{mdl}"] = 0.0
            bt[f"{idx}_equity_{mdl}"] = np.nan

    eq   = {(idx, mdl): starting_cash for idx in indexes for mdl in models}
    hold = {(idx, mdl): 0 for idx in indexes for mdl in models}

    dates = bt.index
    for i, t in enumerate(dates):
        t_sig = dates[i-1] if (enter_next_bar and i-1 >= 0) else t
        ann_rf_t = float(bt.loc[t, rf_col]) if rf_col in bt.columns and pd.notna(bt.loc[t, rf_col]) else 0.0

        for idx in indexes:
            for mdl in models:
                # carry pos
                if i > 0:
                    bt.loc[t, f"{idx}_pos_{mdl}"] = bt.loc[dates[i-1], f"{idx}_pos_{mdl}"]
                pos = float(bt.loc[t, f"{idx}_pos_{mdl}"])

                # countdown & exit
                if hold[(idx, mdl)] > 0:
                    hold[(idx, mdl)] -= 1
                    if hold[(idx, mdl)] == 0:
                        pos = 0.0

                # enter if flat and we have a signal + stats
                if pos == 0.0 and t_sig is not None:
                    crossU = bool(bt.loc[t_sig, f"{idx}_cross_above_upper_{mdl}"]) if f"{idx}_cross_above_upper_{mdl}" in bt.columns else False
                    crossL = bool(bt.loc[t_sig, f"{idx}_cross_below_lower_{mdl}"]) if f"{idx}_cross_below_lower_{mdl}" in bt.columns else False

                    if crossU:
                        s = stats[(idx, mdl)]["crossU"]
                        if s["H"] is not None and s["n"] > 0 and s["var_R"] > 0:
                            rf_H = rf_over_h(ann_rf_t, s["H"])
                            f = kelly_empirical(s["mean_R"], s["var_R"], rf_H, kelly_frac, kelly_cap)
                            pos = +abs(f)
                            hold[(idx, mdl)] = int(s["H"])

                    elif crossL:
                        s = stats[(idx, mdl)]["crossL"]
                        if s["H"] is not None and s["n"] > 0 and s["var_R"] > 0:
                            rf_H = rf_over_h(ann_rf_t, s["H"])
                            f = kelly_empirical(-s["mean_R"], s["var_R"], rf_H, kelly_frac, kelly_cap)
                            # note: for shorts, we used mean PnL of short = -mean_R; keep same var
                            pos = -abs(f)
                            hold[(idx, mdl)] = int(s["H"])

                bt.loc[t, f"{idx}_pos_{mdl}"] = pos

                # apply PnL with daily log return
                r_t = float(bt.loc[t, f"{idx}_log_ret"]) if f"{idx}_log_ret" in bt.columns and pd.notna(bt.loc[t, f"{idx}_log_ret"]) else 0.0
                prev_eq = eq[(idx, mdl)]
                eq[(idx, mdl)] = prev_eq * np.exp(pos * r_t)
                bt.loc[t, f"{idx}_equity_{mdl}"] = eq[(idx, mdl)]
    return bt

# ---- 3) run it ----
horizons = (1,3,5,10)
sig_stats = build_signal_horizon_stats(predictions, indexes, models, horizons=horizons, min_signals=20)
bt = backtest_cross_empirical_kelly(predictions, indexes, models, sig_stats,
                                    rf_col="ten_yr",
                                    starting_cash=10_000,
                                    enter_next_bar=True,
                                    kelly_frac=0.25,  # fractional Kelly
                                    kelly_cap=1.0)    # leverage cap

# ---- 4) quick reporting ----
def sharpe_and_mdd_from_equity(eq: pd.Series, periods_per_year=252):
    rets = np.log(eq).diff().dropna()
    sharpe = (rets.mean() / rets.std()) * np.sqrt(periods_per_year) if rets.std() > 0 else np.nan
    mdd = (eq / eq.cummax() - 1.0).min()
    return float(sharpe), float(mdd)

for idx in indexes:
    for mdl in models:
        col = f"{idx}_equity_{mdl}"
        s, m = sharpe_and_mdd_from_equity(bt[col].dropna())
        fv = bt[col].dropna().iloc[-1]
        ss = sig_stats[(idx, mdl)]
        print(f"{idx}/{mdl}: Final ${fv:,.0f} | Sharpe {s:.2f} | MaxDD {m:.1%} | "
              f"H* U={ss['crossU']['H']} (n={ss['crossU']['n']} mean={ss['crossU']['mean_R']:.4f}) "
              f"L={ss['crossL']['H']} (n={ss['crossL']['n']} mean={ss['crossL']['mean_R']:.4f})")


In [None]:
plot_results(bt, 'baseline_wealth', ['equity'], 'Backtest Performance', 'Wealth', indexes, models)

# MS-Garch Volatility Model

In [None]:
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression
from arch.univariate import ConstantMean, GARCH, StudentsT
import re

Test pulled from GPT (python doesn't have a MS-GARCH library, only separate implementations with reduced functionality)

In [None]:
class MSGARCH:
    """
    Markov switching means & transitions via statsmodels.MarkovRegression,
    per-state GARCH(1,1) via arch; combine with Gray (1996) approximation.
    """
    def __init__(self, k=2, thresh=0.6, max_iter=1, verbose=False, dist='normal'):
        self.k = int(k)
        self.thresh = float(thresh)  # posterior cutoff for state-specific GARCH fit
        self.max_iter = int(max_iter)
        self.verbose = verbose
        self.dist = dist.lower()
        assert self.dist in ("normal", "t"), "dist must be 'normal' or 't'"
        self.nu_ = None   # will hold per-state dof if dist='t'

    def fit(self, r: pd.Series | np.ndarray):
        r = np.asarray(pd.Series(r).astype(float).dropna())
        T = len(r)
        assert T > 50, "Need enough data"

        # 1) HMM on returns with state-dependent intercept (mean) and variance
        #    (statsmodels allows switching_variance=True, which helps separate regimes)
        mod = MarkovRegression(r, k_regimes=self.k,
                               trend='c',  # intercept per regime
                               switching_variance=True)
        res = mod.fit(disp=False)
        self._mr_mod_ = mod
        self._mr_res_ = res

        # --- 1) Extract regime-specific means (intercepts) ---
        names = np.asarray(res.model.param_names)
        vals  = np.asarray(res.params)

        mask = (np.char.find(names, 'intercept') >= 0)
        if not mask.any():
            mask = (np.char.find(names, 'const') >= 0)

        intercept_names = names[mask]
        intercept_vals  = vals[mask]

        pairs = []
        for n, v in zip(intercept_names, intercept_vals):
            m = re.search(r'\[(\d+)\]', n)
            idx = int(m.group(1)) if m else 10**9
            pairs.append((idx, float(v)))
        pairs.sort(key=lambda t: t[0])
        mu = np.array([p[1] for p in pairs])  # length K

        # --- 2) Transition matrix ---
        P_left = mod.regime_transition_matrix(res.params)  # columns sum to 1
        P = P_left.T                                      # rows sum to 1, matches our code convention

        # --- 3) Smoothed posteriors ---
        gamma_obj = res.smoothed_marginal_probabilities
        if hasattr(gamma_obj, "to_numpy"):
            gamma = gamma_obj.to_numpy()
        else:
            gamma = np.asarray(gamma_obj)                     # shape (T, K)

        # 2) Fit GARCH(1,1) separately for each regime using high-probability times
        self._arch_results_ = []
        garch_params = []
        nus = []
        for k in range(self.k):
            mask = (gamma[:, k] == gamma.max(1)) & (gamma[:, k] >= self.thresh)
            idx = np.where(mask)[0]
            if len(idx) < 30:
                idx = np.argsort(gamma[:, k])[-max(30, T // self.k):]
            r_k = r[idx] - mu[k]  # center by state mean

            am = ConstantMean(r_k)
            am.volatility = GARCH(1, 0, 1)
            if self.dist == "t":
                am.distribution = StudentsT()
            res_k = am.fit(disp="off")

            self._arch_results_.append(res_k)
            v = res_k.params
            omega = float(v["omega"])
            alpha = float(v["alpha[1]"])
            beta  = float(v["beta[1]"])

            # (optional) safety clamp on persistence
            s = max(alpha + beta, 1e-6)
            if s >= 0.998:
                shrink = (s / 0.98)
                alpha /= shrink; beta /= shrink

            garch_params.append((omega, alpha, beta))

            if self.dist == "t":
                # ARCH names this 'nu'
                nu = float(v["nu"])
                # ensure nu > 2 so variance exists
                nu = max(nu, 2.01)
                nus.append(nu)

        garch_params = np.array(garch_params)
        omega = garch_params[:, 0]; alpha = garch_params[:, 1]; beta = garch_params[:, 2]
        self.omega_, self.alpha_, self.beta_ = omega, alpha, beta
        self.nu_ = np.array(nus) if self.dist == "t" else None

        # 3) Gray recursion using gamma as pi_t
        h, m_mix, h_mix = self._gray_recursion(r, mu, omega, alpha, beta, gamma)

        # (Optional) one extra iteration: re-center residuals with mixture mean and refit GARCH
        for it in range(self.max_iter):
            if self.max_iter <= 1: break
            # reselect by posterior again (same gamma) but use residuals r - mu_k
            new_params = []
            for k in range(self.k):
                mask = (gamma[:, k] == gamma.max(1)) & (gamma[:, k] >= self.thresh)
                idx = np.where(mask)[0]
                if len(idx) < 30:
                    idx = np.argsort(gamma[:, k])[-max(30, T // self.k):]
                r_k = r[idx] - mu[k]
                am = ConstantMean(r_k)
                am.volatility = GARCH(1, 0, 1)
                res_k = am.fit(disp='off')
                self._arch_results_[k] = res_k
                v = res_k.params
                omega = float(v['omega']); alpha = float(v['alpha[1]']); beta = float(v['beta[1]'])
                s = max(alpha + beta, 1e-6)
                if s >= 0.998:
                    shrink = (s / 0.98)
                    alpha /= shrink; beta /= shrink
                new_params.append((omega, alpha, beta))
            new_params = np.array(new_params)
            omega, alpha, beta = new_params[:,0], new_params[:,1], new_params[:,2]
            h, m_mix, h_mix = self._gray_recursion(r, mu, omega, alpha, beta, gamma)

        # Store
        self.mu_ = mu
        self.P_ = P
        self.gamma_ = gamma
        self.h_ = h                 # (T,K) per-state conditional variances
        self.h_mix_ = h_mix         # (T,) mixture variance
        self.m_mix_ = m_mix         # (T,) mixture mean
        self.omega_ = omega; self.alpha_ = alpha; self.beta_ = beta
        self.r_ = r
        return self

    def _gray_recursion(self, r, mu, omega, alpha, beta, pi):
        T = len(r); K = len(mu)
        h = np.zeros((T, K))
        m_mix = np.zeros(T)
        H_mix = np.zeros(T)

        # initialize with unconditional per state
        den = np.maximum(1.0 - (alpha + beta), 1e-3)
        h0 = omega / den
        h[0] = np.maximum(h0, 1e-8)

        m_mix[0] = (pi[0] @ mu)
        H_mix[0] = (pi[0] @ h[0])

        for t in range(1, T):
            m_mix[t] = pi[t-1] @ mu
            H_mix[t] = pi[t-1] @ h[t-1]
            innov2 = (r[t-1] - m_mix[t])**2
            h[t] = omega + alpha * innov2 + beta * H_mix[t]
            h[t] = np.maximum(h[t], 1e-12)

        h_mix = (pi * h).sum(1)
        return h, m_mix, h_mix

    # One-step-ahead (no new data)
    def predict_next(self):
        r_T = self.r_[-1]
        m_T = self.m_mix_[-1]
        H_T = self.h_mix_[-1]
        pi_T = self.gamma_[-1]
        mu   = self.mu_
        omega, alpha, beta = self.omega_, self.alpha_, self.beta_
        h_next = omega + alpha * (r_T - m_T)**2 + beta * H_T
        h_next = np.maximum(h_next, 1e-12)
        pi_next = pi_T @ self.P_
        mean_next = float(pi_next @ mu)
        var_next  = float(pi_next @ h_next)
        return {"pi_next": pi_next, "h_next_per_state": h_next,
                "mean_next": mean_next, "var_next": var_next}

    def _loglik_state(self, r_new, mu_k, h_k, nu_k=None):
        """
        Log-likelihood of r_new under state k with mean mu_k, variance h_k,
        using Normal or Student-t (df=nu_k) innovations.
        """
        h = max(h_k, 1e-12)
        x = r_new - mu_k
        if self.dist == "normal":
            # N(mu, h)
            return -0.5 * (math.log(2*math.pi*h) + (x*x)/h)
        else:
            # Student-t with df=nu, using *standard* t scaled so that Var = h
            # Standard t(df) with scale s has pdf: log Γ((ν+1)/2) - log(√(νπ) s Γ(ν/2))
            #                                  - (ν+1)/2 * log(1 + ((x)/s)^2 / ν)
            nu = max(float(nu_k), 2.01)
            # choose scale so that Var(X)=h -> s = sqrt(h * ν/(ν-2))
            s = math.sqrt(h * nu/(nu-2.0))
            z2 = (x/s)**2
            return (
                math.lgamma((nu+1)/2.0)
                - math.lgamma(nu/2.0)
                - 0.5*math.log(nu*math.pi) - math.log(s)
                - 0.5*(nu+1.0)*math.log(1.0 + z2/nu)
            )

    # Online filter step (fixed params)
    def filter_update(self, r_new):
        r_new = float(r_new)
        fc = self.predict_next()
        h_next = fc["h_next_per_state"]     # per-state conditional variances for the new time
        pi_pred = fc["pi_next"]
        mu = self.mu_

        # per-state log-likelihood
        loglik = np.empty(self.k)
        for k in range(self.k):
            nu_k = None if self.dist == "normal" else self.nu_[k]
            loglik[k] = self._loglik_state(r_new, mu[k], h_next[k], nu_k=nu_k)

        # stabilize
        loglik -= loglik.max()
        lik = np.exp(loglik)

        pi_post = pi_pred * lik
        pi_post = pi_post / pi_post.sum()

        # append updated series
        self.r_ = np.append(self.r_, r_new)
        self.gamma_ = np.vstack([self.gamma_, pi_post])

        mix_var = float(pi_post @ h_next)
        mix_mean = float(pi_post @ mu)
        self.h_mix_ = np.append(self.h_mix_, mix_var)
        self.m_mix_ = np.append(self.m_mix_, mix_mean)

        return {"pi_post": pi_post, "mix_mean": mix_mean, "mix_var": mix_var}


In [None]:
ms_garch_data = vol_data.copy()

training_data, testing_data = split_data(ms_garch_data)

training_data

In [None]:
garches = {}


for index in indexes:
    r = np.exp(training_data[f'{index}_log_squared_residual'])
    garches[index] = MSGARCH(k=3, thresh=0.8, max_iter=1, verbose=True, dist='t')
    garches[index].fit(r)

In [None]:
# Create backtest df copy
bt = predictions.copy()

# Set initial conditions
starting_cash = 10000
bt_data = {}

# Model
model_name = 'MS_GARCH'

trade_futures = False
futures_columns = '_F' if trade_futures else ''

for index in indexes:
    bt_data[index] = {}


    bt[f'{index}_portfolio_value_{model_name}'] = 0
    bt[f'{index}_cash_{model_name}'] = 0
    bt[f'{index}_cash_{model_name}'].iloc[0] = starting_cash
    bt[f'{index}_signal_{model_name}'] = 0

    bt_data[index][model_name] = {
        'last_value': 0,
        'last_cash': starting_cash
    }

# Starts at second day
for idx, row in bt.iloc[1:].iterrows():
    for index in indexes:
        last_value = bt_data[index][model_name]['last_value']
        last_cash = bt_data[index][model_name]['last_cash']

        close = row[f'{index}_Close']
        rf = (1+row['ten_yr'])**(1/252)-1 # Daily

        div_dict = {'S&P': 'SPY', 'NASDAQ': 'QQQ', 'DJIA': 'DIA'}
        div_name = f'{div_dict[index]}_div'
        div = (1+row[div_name])**(1/252)-1 # Daily

        fc = garches[index].predict_next()
        garches[index].filter_update(row[f'{index}{futures_columns}_log_ret'])

        #mu = (fc['mean_next']/100) / close - 1 # Use garch prediction for returns
        mu = row[f'{index}_next_ret_pred_enet'] / close - 1 # Use our regression model prediction for returns
        #mu = rf - div # Use risk free assumption
        vol = np.sqrt(fc['var_next'])
        
        
        investment = 0

        # Update portfolio value
        current_value = last_value * np.exp(row[f'{index}{futures_columns}_log_ret'])

        # Calculate total equity
        total_equity = current_value + last_cash 

        # Get bet sizing with Merton-Kelly
        #target_pos = .5 * get_merton_kelly_size(mu, rf, vol) * total_equity # Daily rf
        #target_pos = max(-total_equity, min(target_pos, total_equity)) # Make sure we don't exceed equity

        # Get bet sizing with vol target
        target_pos = get_vol_target_sizing(.1/np.sqrt(252), vol) * total_equity
        target_pos = max(-total_equity, min(target_pos, total_equity)) # Make sure we don't exceed equity
        
        # Bet sizing based on size of potential increase/decrease, signed
        #target_pos = (mu - rf)/abs(mu - rf) * total_equity # super leverage
        #target_pos = max(-total_equity, min(target_pos, total_equity)) # Make sure we don't exceed equity

        # Execute trade / Update investment amount to reach target position
        investment = target_pos - current_value

        # Update cash
        last_cash = bt_data[index][model_name]['last_cash'] = last_cash - investment
        bt.loc[idx, f'{index}_cash_{model_name}'] = last_cash

        # Store portfolio value as last value for calculation
        bt_data[index][model_name]['last_value'] = current_value + investment
        bt.loc[idx, f'{index}_portfolio_value_{model_name}'] = bt_data[index][model_name]['last_value']

        # Set signal for plotting later
        if investment > 0:
            bt.loc[idx, f'{index}_signal_{model_name}'] = 1
        elif investment < 0:
            bt.loc[idx, f'{index}_signal_{model_name}'] = -1

    
for index in indexes:
    bt[f'{index}_total_value_{model_name}'] = bt[f'{index}_portfolio_value_{model_name}'] + bt[f'{index}_cash_{model_name}']
    final_value = bt[f'{index}_total_value_{model_name}'].iloc[-1]
    print(f'Final value for {index}, {model_name}: {final_value:.2f}')
    print('\n')

In [None]:
for index in indexes:
    bt[f'{index}_baseline'] = np.exp(bt[f'{index}_log_ret'].cumsum()) * 10000

colors = { 'MS_GARCH': 'red' }

# --- Plotting Code ---

# Initialize a figure with a row for each index
fig = make_subplots(
    rows=len(indexes),
    cols=1,
    subplot_titles=[f'{index} Model Performance' for index in indexes],
    shared_xaxes=True # Link the x-axes
)

# Enumerate through indexes to get the row number (i)
for i, index in enumerate(indexes):
    # Add the baseline trace for the current index
    fig.add_trace(go.Scatter(
        x=bt.index,
        y=bt[f'{index}_baseline'],
        mode='lines',
        name=f'{index} Baseline',
        legendgroup=f'group{i}', # Group legend items by subplot
        line=dict(color='blue', width=1)
    ), row=i + 1, col=1) # row is 1-indexed

    # Add the main performance line for the model
    fig.add_trace(go.Scatter(
        x=bt.index,
        y=bt[f'{index}_total_value_{model_name}'],
        mode='lines',
        name=f'{index} {model_name}',
        legendgroup=f'group{i}',
        line=dict(color=colors[model_name], width=1.5, dash='dot')
    ), row=i + 1, col=1)

    # --- Corrected Signal Plotting ---

    # Create Series for buy/sell signals
    # This puts the portfolio value on the y-axis for the marker, and NaN otherwise
    buy_signals_y = bt.loc[bt[f'{index}_signal_{model_name}'] == 1, f'{index}_total_value_{model_name}']
    sell_signals_y = bt.loc[bt[f'{index}_signal_{model_name}'] == -1, f'{index}_total_value_{model_name}']

""" These are too crowded
        # Add BUY signal markers
        fig.add_trace(go.Scatter(
            x=buy_signals_y.index,
            y=buy_signals_y,
            mode='markers',
            name=f'Buy Signal',
            legendgroup=f'group{i}',
            marker=dict(size=10, symbol='triangle-up', color='green'),
            showlegend=False # Hide from legend to avoid clutter
        ), row=i + 1, col=1)

        # Add SELL signal markers
        fig.add_trace(go.Scatter(
            x=sell_signals_y.index,
            y=sell_signals_y,
            mode='markers',
            name=f'Sell Signal',
            legendgroup=f'group{i}',
            marker=dict(size=10, symbol='triangle-down', color='red'),
            showlegend=False # Hide from legend to avoid clutter
        ), row=i + 1, col=1)
        """

# --- Update the Layout ---
fig.update_layout(
    title_text='Backtest Performance: Model Comparison by Index',
    title_x=0.5, # Center the title
    legend_title='Metrics',
    height=800 # Adjust height to make plots more readable
)

# --- Display the Chart ---
fig.show()

cutoff = math.floor(len(bt)*.8)
training_returns = bt.iloc[:cutoff]
testing_returns = bt.iloc[cutoff:]

for index in indexes:
    total_value = f'{index}_total_value_{model_name}'
    final_training_value = training_returns[total_value].iloc[-1]
    training_cagr = 100 * ((final_training_value/training_returns[total_value].iloc[0])**(1/(training_returns.index[-1].year - training_returns.index[0].year + training_returns.index[-1].month/12))-1)
    print(f'Final training value for {index}, {model_name}: {final_training_value}, CAGR: {training_cagr:.2f}%')

    final_testing_value = testing_returns[total_value].iloc[-1]
    testing_cagr = 100*((final_testing_value/testing_returns[total_value].iloc[0])**(1/(testing_returns.index[-1].year - testing_returns.index[0].year + testing_returns.index[-1].month/12))-1)
    print(f'Final testing value for {index}, {model_name}: {final_testing_value}, CAGR: {testing_cagr:.2f}%')

    # ---- Training metrics ----
    train_curve = training_returns[total_value]
    train_sharpe, train_mdd = sharpe_and_mdd(training_returns, total_value, col_rf="ten_yr")
    print(f"Training Sharpe: {train_sharpe:.2f}, Max Drawdown: {train_mdd:.2%}")

    # ---- Testing metrics ----
    test_curve = testing_returns[total_value]
    test_sharpe, test_mdd = sharpe_and_mdd(testing_returns, total_value, col_rf="ten_yr")
    print(f"Testing Sharpe: {test_sharpe:.2f}, Max Drawdown: {test_mdd:.2%}")

    print('\n')
