
# 🧪 Alpha Strategies Lab

_Date generated: 2025-09-03_

This notebook is a **research workbench** to prototype and evaluate multiple **cross‑sectional alpha strategies** across regions and assets.

**What’s inside**
- Data loader (local CSV or synthetic fallback)
- Signal library: Momentum, Mean‑Reversion, Volatility, Carry (term‑structure proxy), Composite
- Regime filter (volatility / trend state)
- Portfolio construction: rank‑based long/short, z‑score sizing, beta‑neutral & dollar‑neutral options
- Costs: commission, slippage (spread * participation)
- Backtest engine (vectorized), risk targeting
- Metrics: CAGR, Sharpe, Sortino, max DD, turnover, hit rate
- Rolling stats & walk‑forward evaluation


## 0) Parameters

In [None]:

# Data sources
PATH_PRICES = "data/prices.csv"     # wide: date, TICK1, TICK2, ...
FREQ = "B"                          # business days

# Strategy knobs
LOOKBACK_MOM = 60                   # momentum lookback (days)
LOOKBACK_MR  = 5                    # mean reversion lookback
VOL_WINDOW   = 20                   # realized vol window
CARRY_WINDOWS = (20, 60)            # "term structure" proxy windows
N_LONG = 10                          # number of longs
N_SHORT = 10                         # number of shorts
TARGET_VOL_ANNUAL = 0.10            # risk targeting at portfolio level

# Costs
COMMISSION_BPS = 0.2                # 0.2 bp per notional (example)
SLIPPAGE_BPS   = 1.0                # 1 bp slippage per turnover unit

# Backtest range (None -> full)
START_DATE = None
END_DATE   = None


## 1) Setup & Helpers

In [None]:

import os, math, itertools, warnings
from typing import Dict, List, Tuple, Optional

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.float_format = "{:,.6f}".format
warnings.filterwarnings("ignore")

def ann_factor(freq='D'):
    return 252 if freq in ('B','D') else 52 if freq.startswith('W') else 12

def pct_change_safe(df: pd.DataFrame) -> pd.DataFrame:
    x = df.pct_change().replace([np.inf, -np.inf], np.nan)
    return x

def zscore(df: pd.DataFrame, win: int) -> pd.DataFrame:
    mu = df.rolling(win).mean()
    sd = df.rolling(win).std(ddof=1).replace(0, np.nan)
    return (df - mu) / sd

def winsorize(s: pd.Series, p=0.01) -> pd.Series:
    lo, hi = s.quantile(p), s.quantile(1-p)
    return s.clip(lo, hi)

def drawdown_curve(nav: pd.Series) -> pd.Series:
    rm = nav.cummax()
    return nav/rm - 1.0


## 2) Load Data (local or synthetic)

In [None]:

def load_prices(path=PATH_PRICES, n_assets=60, n_days=800, seed=7) -> pd.DataFrame:
    if os.path.exists(path):
        df = pd.read_csv(path, parse_dates=['date']).set_index('date').sort_index()
        return df
    # Synthetic multi‑region prices with sector structure
    rng = np.random.default_rng(seed)
    dates = pd.bdate_range("2022-01-01", periods=n_days, freq=FREQ)
    # latent factors: market + 5 sectors
    market = rng.normal(0.0003, 0.009, size=len(dates))
    sectors = rng.normal(0.0002, 0.006, size=(len(dates), 5))
    idio = rng.normal(0, 0.01, size=(len(dates), n_assets))
    px = np.zeros((len(dates), n_assets))
    for j in range(n_assets):
        s = j % 5
        ret = market + 0.5*sectors[:, s] + 0.5*idio[:, j]
        px[:, j] = 100 * (1 + pd.Series(ret, index=dates)).cumprod().values # type: ignore
    cols = [f"A{j:03d}" for j in range(n_assets)]
    return pd.DataFrame(px, index=dates, columns=cols)

prices = load_prices()
if START_DATE: prices = prices.loc[START_DATE:]
if END_DATE:   prices = prices.loc[:END_DATE]
returns = pct_change_safe(prices).fillna(0.0)
prices.head(), returns.head()


## 3) Signal Library

In [None]:

def sig_momentum(prices: pd.DataFrame, lb: int = LOOKBACK_MOM) -> pd.DataFrame:
    # Price momentum: cumulative return over lookback (skip most recent day to reduce reversal)
    ret = prices.pct_change().fillna(0.0)
    mom = (1 + ret).rolling(lb).apply(lambda x: np.prod(1+x[:-1]) - 1, raw=False)
    return mom

def sig_mean_reversion(prices: pd.DataFrame, lb: int = LOOKBACK_MR) -> pd.DataFrame:
    # Short‑term reversal: negative of recent return
    mr = -prices.pct_change(lb)
    return mr

def sig_volatility(returns: pd.DataFrame, win: int = VOL_WINDOW) -> pd.DataFrame:
    vol = returns.rolling(win).std(ddof=1)
    return -vol   # prefer low vol (defensive tilt)

def sig_carry(prices: pd.DataFrame, w1=20, w2=60) -> pd.DataFrame:
    # Term‑structure proxy: difference in two moving averages slopes
    ma1 = prices.rolling(w1).mean()
    ma2 = prices.rolling(w2).mean()
    slope1 = ma1.pct_change(w1)
    slope2 = ma2.pct_change(w2)
    return slope1 - slope2

def sig_composite(prices: pd.DataFrame, returns: pd.DataFrame) -> pd.DataFrame:
    m = sig_momentum(prices)
    r = sig_mean_reversion(prices)
    v = sig_volatility(returns)
    c = sig_carry(prices, *CARRY_WINDOWS)
    # Normalize each signal cross‑sectionally daily
    def cs_rank(df):
        return df.rank(axis=1, pct=True).subtract(0.5) * 2  # [-1,1]
    M, R, V, C = map(cs_rank, (m, r, v, c))
    comp = 0.4*M + 0.2*R + 0.2*V + 0.2*C
    return comp


## 4) Regime Filter

In [None]:

def regime_filter(returns: pd.DataFrame, win=60) -> pd.Series:
    # Simple state: 'risk_on' if market trend up & vol not extreme
    idx = returns.index
    # proxy market = equal‑weight index
    mkt = returns.mean(axis=1)
    trend = mkt.rolling(win).mean()
    vol = mkt.rolling(win).std(ddof=1)
    state = (trend > 0) & (vol < vol.quantile(0.8))
    state = state.reindex(idx).fillna(False)
    return state.rename("risk_on")

regime = regime_filter(returns)
regime.tail()


## 5) Portfolio Construction

In [None]:

def build_portfolio(signal: pd.DataFrame,
                    returns: pd.DataFrame,
                    n_long=N_LONG, n_short=N_SHORT,
                    beta_neutral=True):
    """Create daily weights from cross-sectional signal.
    Long top N, short bottom N (equal weight inside bucket).
    Optionally enforce beta-neutral (market beta ~ 0) by de-meaning weights.
    """
    w = pd.DataFrame(0.0, index=signal.index, columns=signal.columns)
    for dt, row in signal.iterrows():
        s = row.dropna()
        if len(s) < (n_long + n_short):
            continue
        longs = s.nlargest(n_long).index
        shorts = s.nsmallest(n_short).index
        w.loc[dt, longs] = 1.0 / n_long # type: ignore
        w.loc[dt, shorts] = -1.0 / n_short # type: ignore
        if beta_neutral:
            # demean weights using market proxy = equal weight
            w.loc[dt] = w.loc[dt] - w.loc[dt].mean() # type: ignore
        # Normalize to 1 gross
        if w.loc[dt].abs().sum() > 0: # type: ignore
            w.loc[dt] = w.loc[dt] / w.loc[dt].abs().sum() # type: ignore
    return w

def risk_target_weights(w: pd.DataFrame, returns: pd.DataFrame, target_vol_ann=TARGET_VOL_ANNUAL, win=60):
    # Scale portfolio exposure to hit target annual vol
    mkt = (returns * w.shift(1)).sum(axis=1)  # strategy return series
    vol = mkt.rolling(win).std(ddof=1) * np.sqrt(252)
    scale = (target_vol_ann / (vol.replace(0,np.nan))).clip(0, 5.0).fillna(0.0)
    scale = scale.reindex(w.index).fillna(method="ffill").fillna(0.0)
    return w.mul(scale, axis=0)


## 6) Backtest Engine (with costs)

In [None]:

def backtest(weights: pd.DataFrame, returns: pd.DataFrame,
             commission_bps=COMMISSION_BPS, slippage_bps=SLIPPAGE_BPS) -> pd.Series:
    # Strategy return before costs
    strat_ret_gross = (weights.shift(1) * returns).sum(axis=1)
    # Turnover cost: sum |Δw| * (commission + slippage)
    dw = (weights - weights.shift(1)).abs().sum(axis=1)
    cost = dw * ((commission_bps + slippage_bps)/10000.0)
    net = strat_ret_gross - cost
    return net.fillna(0.0)

def metrics(r: pd.Series) -> dict:
    cagr = (1 + r).prod()**(252/len(r)) - 1.0 if len(r)>0 else np.nan # type: ignore
    vol = r.std(ddof=1) * np.sqrt(252) if r.std()>0 else np.nan
    sharpe = (r.mean()/r.std(ddof=1))*np.sqrt(252) if r.std()>0 else np.nan
    downside = r[r<0].std(ddof=1)*np.sqrt(252) if (r<0).any() else np.nan
    sortino = (r.mean()*252) / downside if (downside is not None and downside>0) else np.nan
    nav = (1 + r).cumprod()
    dd = drawdown_curve(nav)
    hit = (r > 0).mean()
    return {"CAGR": cagr, "Vol": vol, "Sharpe": sharpe, "Sortino": sortino, "MaxDD": dd.min(), "HitRate": hit}


## 7) Run Strategies

In [None]:

signals = {
    "Momentum": sig_momentum(prices, LOOKBACK_MOM),
    "MeanRev": sig_mean_reversion(prices, LOOKBACK_MR),
    "LowVol": sig_volatility(returns, VOL_WINDOW),
    "CarryTS": sig_carry(prices, *CARRY_WINDOWS),
    "Composite": sig_composite(prices, returns),
}

results = {}
for name, sig in signals.items():
    w = build_portfolio(sig, returns, N_LONG, N_SHORT, beta_neutral=True)
    w = risk_target_weights(w, returns, TARGET_VOL_ANNUAL)
    r = backtest(w, returns)
    results[name] = r

pd.DataFrame({k: v.describe() for k, v in results.items()}).T.head()


### Equity Curves (Net of Costs)

In [None]:

plt.figure(figsize=(10,4))
for k, r in results.items():
    (1+r).cumprod().plot(label=k, alpha=0.9)
plt.title("Strategy Equity Curves")
plt.legend()
plt.tight_layout()
plt.show()


### Rolling Sharpe (126d)

In [None]:

roll_win = 126
plt.figure(figsize=(10,3.5))
for k, r in results.items():
    rs = r.rolling(roll_win).mean() / r.rolling(roll_win).std(ddof=1)
    (rs * np.sqrt(252)).plot(label=k, alpha=0.8)
plt.axhline(0, color='k', lw=1)
plt.title(f"Rolling Sharpe ({roll_win}d)")
plt.legend()
plt.tight_layout()
plt.show()


## 8) Regime Analysis

In [None]:

reg = regime.reindex(returns.index).fillna(False)
summary_reg = {}
for k, r in results.items():
    r_on  = r[reg]
    r_off = r[~reg]
    summary_reg[k] = {
        "Sharpe_risk_on": (r_on.mean()/r_on.std())*np.sqrt(252) if r_on.std()>0 else np.nan,
        "Sharpe_risk_off": (r_off.mean()/r_off.std())*np.sqrt(252) if r_off.std()>0 else np.nan,
        "Hit_on": (r_on>0).mean(),
        "Hit_off": (r_off>0).mean(),
    }
pd.DataFrame(summary_reg).T


## 9) Walk‑Forward Evaluation

In [None]:

def walk_forward(signal_fn, params_list: List[dict], prices: pd.DataFrame, returns: pd.DataFrame,
                 train=252*2, test=252):
    idx = prices.index
    cursor = train
    perf = []
    while cursor + test <= len(idx):
        tr = slice(idx[cursor-train], idx[cursor-1])
        te = slice(idx[cursor], idx[cursor+test-1])
        best = None
        best_sharpe = -1e9
        for p in params_list:
            # Determine signature by trying kwargs
            try:
                sig = signal_fn(prices.loc[tr], **p)
            except TypeError:
                sig = signal_fn(returns.loc[tr], **p)
            w = build_portfolio(sig, returns.loc[tr])
            w = risk_target_weights(w, returns.loc[tr], TARGET_VOL_ANNUAL)
            r = backtest(w, returns.loc[tr])
            shp = (r.mean()/r.std())*np.sqrt(252) if r.std()>0 else -1e9
            if shp > best_sharpe:
                best_sharpe, best = shp, p
        # test on holdout with best params
        try:
            sig_te = signal_fn(prices.loc[te], **best) # type: ignore
        except TypeError:
            sig_te = signal_fn(returns.loc[te], **best) # type: ignore
        w_te = build_portfolio(sig_te, returns.loc[te])
        w_te = risk_target_weights(w_te, returns.loc[te], TARGET_VOL_ANNUAL)
        r_te = backtest(w_te, returns.loc[te])
        perf.append(r_te)
        cursor += test
    if perf:
        return pd.concat(perf).sort_index()
    return pd.Series(dtype=float)

param_grid = [{"lb": x} for x in [40, 60, 90, 120]]
wf_mom = walk_forward(lambda prices, lb: sig_momentum(prices, lb=lb), param_grid, prices, returns)

plt.figure(figsize=(10,3.5))
(1 + wf_mom).cumprod().plot()
plt.title("Walk‑Forward — Momentum")
plt.tight_layout()
plt.show()

metrics_wf = {
    "CAGR": (1+wf_mom).prod()**(252/len(wf_mom)) - 1 if len(wf_mom)>0 else np.nan, # type: ignore
    "Sharpe": (wf_mom.mean()/wf_mom.std())*np.sqrt(252) if wf_mom.std()>0 else np.nan, # type: ignore
    "MaxDD": drawdown_curve((1+wf_mom).cumprod()).min() if len(wf_mom)>0 else np.nan # type: ignore
}
metrics_wf


## 10) Summary Metrics

In [None]:

summary = {}
for k, r in results.items():
    summary[k] = metrics(r)
pd.DataFrame(summary).T.sort_values("Sharpe", ascending=False)
