In [34]:
from fredapi import Fred
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import TimeSeriesSplit
from google.colab import userdata
import yfinance as yf
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

FRED_API_KEY = userdata.get("FRED_API_KEY")
fred = Fred(api_key= FRED_API_KEY)

series = {
    "cpi": "CPIAUCSL",
    "fed_funds": "FEDFUNDS",
    "unemployment": "UNRATE",
    "yield_spread": "T10Y2Y",
    "industrial_production": "INDPRO"
}

macro = pd.DataFrame({
    name: fred.get_series(code)
    for name, code in series.items()
})

macro_m = pd.DataFrame(index=macro.resample("ME").mean().index)

macro_m["cpi"] = macro["cpi"].resample("ME").last()
macro_m["fed_funds"] = macro["fed_funds"].resample("ME").last()
macro_m["unemployment"] = macro["unemployment"].resample("ME").last()
macro_m["industrial_production"] = macro["industrial_production"].resample("ME").last()
macro_m["yield_spread"] = macro["yield_spread"].resample("ME").mean()
macro_m = macro_m.loc["2000-01-31":].dropna()

macro_m.head()


Unnamed: 0,cpi,fed_funds,unemployment,industrial_production,yield_spread
2000-01-31,169.3,5.45,4.0,91.538,0.221
2000-02-29,170.0,5.73,4.1,91.8239,-0.091
2000-03-31,171.0,5.85,4.0,92.1504,-0.271739
2000-04-30,170.9,6.02,3.8,92.6989,-0.413158
2000-05-31,171.2,6.27,4.0,92.9499,-0.369091


In [35]:


tickers = [
    "XLF","XLK","XLV","XLY","XLP","XLE",
    "XLI","XLB","XLU","XLRE","XLC"
]

prices = yf.download(
    tickers,
    start="2000-01-01",
    auto_adjust=True
)["Close"]

monthly_prices = prices.resample("ME").last()

sector_returns = np.log(monthly_prices / monthly_prices.shift(1))
sector_returns = sector_returns.dropna(how="all")
sector_returns.head()

[*********************100%***********************]  11 of 11 completed


Ticker,XLB,XLC,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2000-02-29,-0.105881,,-0.043256,-0.11322,-0.056753,0.099907,-0.124446,,-0.129582,-0.067622,-0.057327
2000-03-31,0.096122,,0.11727,0.167735,0.130332,0.080559,0.036904,,0.103673,0.086132,0.132358
2000-04-30,-0.033785,,-0.015037,0.009613,0.014343,-0.096342,0.051661,,0.063405,-0.011786,-0.021042
2000-05-31,-0.032065,,0.111026,0.022075,-0.004228,-0.10979,0.0689,,-0.002196,-0.02717,-0.055491
2000-06-30,-0.089391,,-0.056587,-0.049717,-0.040126,0.094907,0.055068,,-0.034558,0.002116,-0.055621


In [36]:


macro_features = pd.DataFrame(index=macro_m.index)

macro_features["cpi_yoy"] = macro_m["cpi"].pct_change(12)

macro_features["fed_funds_change"] = macro_m["fed_funds"].diff()

macro_features["unemployment_change"] = macro_m["unemployment"].diff()

macro_features["yield_spread"] = macro_m["yield_spread"]

macro_features["industrial_prod_yoy"] = macro_m["industrial_production"].pct_change(12)

macro_features = macro_features.dropna()

macro_features.head()


Unnamed: 0,cpi_yoy,fed_funds_change,unemployment_change,yield_spread,industrial_prod_yoy
2001-01-31,0.037212,-0.42,0.3,0.400952,0.003976
2001-02-28,0.035294,-0.49,0.0,0.442105,-0.005668
2001-03-31,0.029825,-0.18,0.1,0.543182,-0.011223
2001-04-30,0.032183,-0.51,0.1,0.907,-0.020602
2001-05-31,0.035631,-0.59,-0.1,1.131364,-0.027912


In [37]:
macro = macro_features.copy()

lags = [1, 3, 6, 12]
lagged = []

for lag in lags:
    lagged_df = macro.shift(lag)
    lagged_df.columns = [f"{c}_lag{lag}" for c in macro.columns]
    lagged.append(lagged_df)

macro_lagged = pd.concat([macro] + lagged, axis=1)
macro_lagged = macro_lagged.dropna()

macro_lagged.head()

Unnamed: 0,cpi_yoy,fed_funds_change,unemployment_change,yield_spread,industrial_prod_yoy,cpi_yoy_lag1,fed_funds_change_lag1,unemployment_change_lag1,yield_spread_lag1,industrial_prod_yoy_lag1,...,cpi_yoy_lag6,fed_funds_change_lag6,unemployment_change_lag6,yield_spread_lag6,industrial_prod_yoy_lag6,cpi_yoy_lag12,fed_funds_change_lag12,unemployment_change_lag12,yield_spread_lag12,industrial_prod_yoy_lag12
2002-01-31,0.011959,-0.09,0.0,2.007619,-0.036826,0.016037,-0.27,0.2,1.9805,-0.047748,...,0.027215,-0.2,0.1,1.197619,-0.037735,0.037212,-0.42,0.3,0.400952,0.003976
2002-02-28,0.011364,0.01,0.0,1.896316,-0.0304,0.011959,-0.09,0.0,2.007619,-0.036826,...,0.027215,-0.12,0.3,1.213913,-0.036215,0.035294,-0.49,0.0,0.442105,-0.005668
2002-03-31,0.013629,-0.01,0.0,1.7265,-0.021375,0.011364,0.01,0.0,1.896316,-0.0304,...,0.025922,-0.58,0.1,1.612941,-0.045502,0.029825,-0.18,0.1,0.543182,-0.011223
2002-04-30,0.01644,0.02,0.2,1.788182,-0.013053,0.013629,-0.01,0.0,1.7265,-0.021375,...,0.021277,-0.58,0.3,1.840909,-0.044405,0.032183,-0.51,0.1,0.907,-0.020602
2002-05-31,0.012408,0.0,-0.1,1.900455,-0.004333,0.01644,0.02,0.2,1.788182,-0.013053,...,0.018944,-0.4,0.2,1.869,-0.050449,0.035631,-0.59,-0.1,1.131364,-0.027912


In [38]:
sector_returns = sector_returns.copy()
sector_returns.index.name = "Date"

returns_long = (
    sector_returns
    .reset_index()
    .melt(id_vars="Date", var_name="sector", value_name="return")
)

In [39]:
macro_lagged = macro_lagged.copy()
macro_lagged.index.name = "Date"

dataset = (
    returns_long
    .merge(macro_lagged.reset_index(), on="Date", how="inner")
    .dropna(subset=["return"])
    .sort_values(["sector", "Date"])
)

dataset.head()

Unnamed: 0,Date,sector,return,cpi_yoy,fed_funds_change,unemployment_change,yield_spread,industrial_prod_yoy,cpi_yoy_lag1,fed_funds_change_lag1,...,cpi_yoy_lag6,fed_funds_change_lag6,unemployment_change_lag6,yield_spread_lag6,industrial_prod_yoy_lag6,cpi_yoy_lag12,fed_funds_change_lag12,unemployment_change_lag12,yield_spread_lag12,industrial_prod_yoy_lag12
0,2002-01-31,XLB,0.023075,0.011959,-0.09,0.0,2.007619,-0.036826,0.016037,-0.27,...,0.027215,-0.2,0.1,1.197619,-0.037735,0.037212,-0.42,0.3,0.400952,0.003976
1,2002-02-28,XLB,0.051567,0.011364,0.01,0.0,1.896316,-0.0304,0.011959,-0.09,...,0.027215,-0.12,0.3,1.213913,-0.036215,0.035294,-0.49,0.0,0.442105,-0.005668
2,2002-03-31,XLB,0.030287,0.013629,-0.01,0.0,1.7265,-0.021375,0.011364,0.01,...,0.025922,-0.58,0.1,1.612941,-0.045502,0.029825,-0.18,0.1,0.543182,-0.011223
3,2002-04-30,XLB,-0.048389,0.01644,0.02,0.2,1.788182,-0.013053,0.013629,-0.01,...,0.021277,-0.58,0.3,1.840909,-0.044405,0.032183,-0.51,0.1,0.907,-0.020602
4,2002-05-31,XLB,0.048389,0.012408,0.0,-0.1,1.900455,-0.004333,0.01644,0.02,...,0.018944,-0.4,0.2,1.869,-0.050449,0.035631,-0.59,-0.1,1.131364,-0.027912


In [40]:
feature_cols = [c for c in dataset.columns if c not in ["Date", "sector", "return"]]

In [41]:


def ts_cv_r2(X, y, alpha=10.0, splits=5):
    tscv = TimeSeriesSplit(n_splits=splits)
    scores = []
    model = Ridge(alpha=alpha)

    for train_idx, test_idx in tscv.split(X):
        model.fit(X.iloc[train_idx], y.iloc[train_idx])
        scores.append(model.score(X.iloc[test_idx], y.iloc[test_idx]))

    return float(np.mean(scores))

In [42]:
results = []
coefficients = {}

for sector in sorted(dataset["sector"].unique()):
    df = dataset[dataset["sector"] == sector]

    X = df[feature_cols]
    y = df["return"]

    cv_r2 = ts_cv_r2(X, y, alpha=10.0)

    model = Ridge(alpha=10.0)
    model.fit(X, y)

    coefficients[sector] = (
        pd.Series(model.coef_, index=feature_cols)
        .sort_values(key=np.abs, ascending=False)
    )

    results.append({
        "sector": sector,
        "observations": len(df),
        "cv_r2": cv_r2
    })

results_df = pd.DataFrame(results).sort_values("cv_r2", ascending=False)
results_df


Unnamed: 0,sector,observations,cv_r2
7,XLRE,120,-0.01064
1,XLC,88,-0.05556
4,XLI,286,-0.07166
2,XLE,286,-0.075125
0,XLB,286,-0.097072
10,XLY,286,-0.104762
9,XLV,286,-0.111273
6,XLP,286,-0.120145
5,XLK,286,-0.121295
8,XLU,286,-0.128579


In [43]:
for sec in results_df["sector"]:
    print(f"\n===== {sec} =====")
    display(coefficients[sec].head(6))



===== XLRE =====


Unnamed: 0,0
yield_spread_lag1,0.017291
fed_funds_change,0.011655
yield_spread_lag12,-0.010919
fed_funds_change_lag1,-0.00929
fed_funds_change_lag6,0.006965
unemployment_change,0.005115



===== XLC =====


Unnamed: 0,0
yield_spread_lag6,-0.015329
yield_spread_lag12,-0.014944
fed_funds_change_lag1,-0.012782
fed_funds_change_lag6,0.012667
yield_spread_lag1,0.011075
fed_funds_change,0.010195



===== XLI =====


Unnamed: 0,0
fed_funds_change,0.023549
fed_funds_change_lag3,0.018334
fed_funds_change_lag12,0.013853
yield_spread_lag3,-0.010359
yield_spread_lag1,0.010083
yield_spread_lag6,-0.006549



===== XLE =====


Unnamed: 0,0
fed_funds_change,0.039355
fed_funds_change_lag1,-0.02592
unemployment_change,0.01663
unemployment_change_lag6,-0.010344
fed_funds_change_lag3,0.009022
industrial_prod_yoy_lag12,-0.00601



===== XLB =====


Unnamed: 0,0
fed_funds_change,0.019798
yield_spread_lag6,-0.007792
unemployment_change,0.00638
industrial_prod_yoy_lag12,-0.006342
fed_funds_change_lag3,0.006029
fed_funds_change_lag1,-0.00477



===== XLY =====


Unnamed: 0,0
fed_funds_change,0.018186
yield_spread_lag1,0.01311
fed_funds_change_lag12,0.011425
fed_funds_change_lag3,0.009043
yield_spread_lag6,-0.008419
unemployment_change,0.008388



===== XLV =====


Unnamed: 0,0
fed_funds_change,0.008523
unemployment_change,0.00713
yield_spread_lag1,0.006525
fed_funds_change_lag12,0.005952
yield_spread_lag6,-0.00592
fed_funds_change_lag3,0.004295



===== XLP =====


Unnamed: 0,0
fed_funds_change,0.010202
fed_funds_change_lag3,0.008127
yield_spread_lag1,0.008025
fed_funds_change_lag1,-0.007576
fed_funds_change_lag12,0.005674
yield_spread_lag6,-0.004631



===== XLK =====


Unnamed: 0,0
fed_funds_change_lag12,0.015031
yield_spread_lag6,-0.014503
fed_funds_change,0.01169
fed_funds_change_lag6,0.010186
yield_spread_lag12,0.006621
industrial_prod_yoy_lag12,-0.006319



===== XLU =====


Unnamed: 0,0
yield_spread_lag1,0.015925
fed_funds_change,0.015123
fed_funds_change_lag12,0.013779
yield_spread,-0.011673
fed_funds_change_lag1,-0.009268
fed_funds_change_lag3,0.00883



===== XLF =====


Unnamed: 0,0
fed_funds_change,0.034284
fed_funds_change_lag3,0.029077
yield_spread_lag3,-0.018732
fed_funds_change_lag12,0.01553
yield_spread,0.011668
industrial_prod_yoy_lag12,-0.008555


In [44]:
feature_cols = [c for c in dataset.columns if c not in ["Date", "sector", "return"]]

models = {}

for sec in sorted(dataset["sector"].unique()):
    df = dataset[dataset["sector"] == sec].sort_values("Date")
    X = df[feature_cols]
    y = df["return"]

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("ridge", Ridge(alpha=10.0))
    ])
    pipe.fit(X, y)
    models[sec] = pipe

list(models.keys())[:5]

['XLB', 'XLC', 'XLE', 'XLF', 'XLI']

In [45]:
H = 12  # months ahead
last_date = macro_features.index.max()
future_dates = pd.date_range(last_date + pd.offsets.MonthEnd(1), periods=H, freq="ME")

# Baseline: keep macro features flat at last observed value (simple, defensible)
baseline_path = pd.DataFrame(
    np.tile(macro_features.iloc[-1].values, (H, 1)),
    index=future_dates,
    columns=macro_features.columns
)

def make_scenario(base: pd.DataFrame, name: str):
    sc = base.copy()
    sc.attrs["name"] = name
    return sc


In [46]:
rate_shock = make_scenario(baseline_path, "Rate Shock (+100bps over 6m)")
rate_shock.loc[rate_shock.index[:6], "fed_funds_change"] += (1.00 / 6)  # 1.00 = 100 bps

# 2) Recession: unemployment rises, production falls, curve inverts
recession = make_scenario(baseline_path, "Recession")
recession["unemployment_change"] += 0.10  # +0.10 pp each month (adjust if you want)
recession["industrial_prod_yoy"] += -0.02 # -2% YoY drag
recession["yield_spread"] += -0.75        # more inverted curve

# 3) Stagflation: inflation up, growth down, curve flatter
stagflation = make_scenario(baseline_path, "Stagflation")
stagflation["cpi_yoy"] += 0.02            # +2% YoY inflation pressure
stagflation["industrial_prod_yoy"] += -0.015
stagflation["yield_spread"] += -0.25

In [47]:
lags = [1, 3, 6, 12]

def build_lagged_from_history_and_future(history: pd.DataFrame, future: pd.DataFrame):
    full = pd.concat([history, future], axis=0)

    parts = [full]
    for lag in lags:
        tmp = full.shift(lag)
        tmp.columns = [f"{c}_lag{lag}" for c in full.columns]
        parts.append(tmp)

    full_lagged = pd.concat(parts, axis=1)
    # Return only the future period rows, dropping any rows with missing lags
    out = full_lagged.loc[future.index].dropna()
    return out

In [48]:
def predict_scenario_returns(scenario_future_features: pd.DataFrame):
    X_future = build_lagged_from_history_and_future(macro_features, scenario_future_features)

    # Make sure feature order matches training
    X_future = X_future[feature_cols]

    preds = pd.DataFrame(index=X_future.index)
    for sec, pipe in models.items():
        preds[sec] = pipe.predict(X_future)

    return preds

pred_rate = predict_scenario_returns(rate_shock)
pred_recession = predict_scenario_returns(recession)
pred_stag = predict_scenario_returns(stagflation)

pred_rate.head()

Unnamed: 0,XLB,XLC,XLE,XLF,XLI,XLK,XLP,XLRE,XLU,XLV,XLY
2025-12-31,0.006258,0.035419,0.035544,-0.007832,0.002249,-0.003086,0.008848,0.026179,0.009114,0.002512,-0.002045
2026-01-31,-0.002207,0.01217,0.009875,-0.023909,-0.01118,-0.007532,-0.000703,0.008574,-0.003128,0.001842,-0.009932
2026-02-28,-0.009506,-0.004883,-0.001698,-0.049987,-0.027613,-0.014925,-0.008187,-0.001787,-0.009427,-0.002011,-0.018652
2026-03-31,-0.003635,0.002142,0.001543,-0.025226,-0.011748,-0.003693,-0.001294,0.0022,0.000933,0.001947,-0.009115
2026-04-30,-0.001884,-0.006849,0.001828,-0.021789,-0.009047,-0.004515,-0.00092,-0.002148,0.002854,0.001193,-0.007046


In [49]:
def summarize(pred: pd.DataFrame, name: str):
    cum_log = pred.sum(axis=0)
    cum_simple = np.exp(cum_log) - 1
    out = pd.DataFrame({
        "scenario": name,
        "cum_12m_return": cum_simple
    }).sort_values("cum_12m_return")
    return out

summary = pd.concat([
    summarize(pred_rate, rate_shock.attrs["name"]),
    summarize(pred_recession, recession.attrs["name"]),
    summarize(pred_stag, stagflation.attrs["name"])
])

summary

Unnamed: 0,scenario,cum_12m_return
XLF,Rate Shock (+100bps over 6m),-0.33014
XLI,Rate Shock (+100bps over 6m),-0.17922
XLY,Rate Shock (+100bps over 6m),-0.158599
XLK,Rate Shock (+100bps over 6m),-0.134218
XLC,Rate Shock (+100bps over 6m),-0.083012
XLB,Rate Shock (+100bps over 6m),-0.080774
XLRE,Rate Shock (+100bps over 6m),-0.055782
XLP,Rate Shock (+100bps over 6m),-0.036602
XLU,Rate Shock (+100bps over 6m),-0.032447
XLV,Rate Shock (+100bps over 6m),-0.01829


In [50]:
# Example equal-weight portfolio across available sectors
weights = pd.Series(1.0, index=pred_rate.columns)
weights = weights / weights.sum()

def portfolio_path(pred: pd.DataFrame, w: pd.Series):
    w = w.reindex(pred.columns).fillna(0)
    port_log = pred.mul(w, axis=1).sum(axis=1)
    port_simple = np.exp(port_log.cumsum()) - 1
    return port_simple

port_rate = portfolio_path(pred_rate, weights)
port_recession = portfolio_path(pred_recession, weights)
port_stag = portfolio_path(pred_stag, weights)

pd.DataFrame({
    "Rate Shock": port_rate,
    "Recession": port_recession,
    "Stagflation": port_stag
}).tail()

Unnamed: 0,Rate Shock,Recession,Stagflation
2026-07-31,-0.043153,-0.099409,-0.147116
2026-08-31,-0.052308,-0.11231,-0.16815
2026-09-30,-0.066616,-0.12462,-0.188288
2026-10-31,-0.082821,-0.138744,-0.209759
2026-11-30,-0.102358,-0.156038,-0.233748
