In [4]:
%pip install -U nbformat

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install plotly

Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install pandas_datareader

Note: you may need to restart the kernel to use updated packages.


In [7]:
import pandas as pd
import numpy as np 
from pandas_datareader import data as web
import yfinance as yf

import plotly.io as pio
pio.renderers.default = "notebook_connected"  # good default for Jupyter
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

In [8]:
START = "2000-01-01"

FRED = {
    # Growth
    "INDPRO":   "Industrial Production",
    "PAYEMS":   "Nonfarm Payrolls",
    "UNRATE":   "Unemployment Rate",
    "ICSA":     "Initial Claims",
    "RSAFS":    "Retail Sales",

    # ISM PMI (NAPM) removed from FRED so replace with free survey proxies 
    "GACDISA066MSFRBNY": "NY Fed Empire: General Business Conditions (diffusion)",
    "GACDFSA066MSFRBPHI": "Philly Fed: Current General Activity (diffusion)",

    # Inflation
    "CPILFESL": "Core CPI",
    "PCEPILFE": "Core PCE",
    "CES0500000003": "Avg Hourly Earnings",

    # Liquidity / financial conditions / credit
    "WALCL":    "Fed Balance Sheet",
    "RRPONTSYD":"ON RRP",
    "WTREGEN":  "Treasury General Account (TGA)",
    "NFCI":     "Chicago Fed NFCI",
    "BAMLH0A0HYM2": "HY OAS",
}

BUCKETS = {
    "growth":   [
        "INDPRO","PAYEMS","UNRATE","ICSA","RSAFS",
        "GACDISA066MSFRBNY","GACDFSA066MSFRBPHI"
    ],
    "inflation":["CPILFESL","PCEPILFE","CES0500000003"],
    "liquidity":["WALCL","RRPONTSYD","WTREGEN","NFCI","BAMLH0A0HYM2"],
}

# +1 means "higher value = better growth / more inflation / easier liquidity"
# -1 means invert (higher value = worse growth or tighter liquidity)
SIGN = {
    # Growth
    "INDPRO": +1,
    "PAYEMS": +1,
    "UNRATE": -1,
    "ICSA":   -1,
    "RSAFS":  +1,
    "GACDISA066MSFRBNY": +1,
    "GACDFSA066MSFRBPHI": +1,

    # Inflation
    "CPILFESL": +1,
    "PCEPILFE": +1,
    "CES0500000003": +1,

    # Liquidity / conditions / credit
    "WALCL": +1,
    "RRPONTSYD": -1,
    "WTREGEN":  -1,
    "NFCI":     -1,
    "BAMLH0A0HYM2": -1,
}

ASSETS = ["SPY","TLT","GLD","UUP","HYG"]

In [9]:
def fetch_fred(codes, start=START):
    df = pd.DataFrame()
    for code in codes:
        s = web.DataReader(code, "fred", start)[code]
        df[code] = s
    return df

raw = fetch_fred(list(FRED.keys()), START)

# Put everything on a monthly grid (month-end "state")
m = raw.resample("ME").last()
m.tail()

Unnamed: 0_level_0,INDPRO,PAYEMS,UNRATE,ICSA,RSAFS,GACDISA066MSFRBNY,GACDFSA066MSFRBPHI,CPILFESL,PCEPILFE,CES0500000003,WALCL,RRPONTSYD,WTREGEN,NFCI,BAMLH0A0HYM2
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2025-07-31,101.8738,159511,4.2,,727727.0,5.5,15.9,328.656,126.426,36.43,,245.53,,,2.91
2025-08-31,101.6019,159485,4.3,,731700.0,11.9,-0.3,329.793,126.707,36.58,,97.426,,-0.52147,3.13
2025-09-30,101.6729,159593,4.4,,732444.0,-8.7,23.2,330.542,126.955,36.65,,,,,2.84
2025-10-31,101.616,159488,,,732633.0,10.7,-12.8,,,36.81,6587119.0,10.179,805139.0,,2.81
2025-11-30,101.7935,159552,4.6,229000.0,,18.7,-1.7,331.068,,36.86,,,,,


In [10]:
# 0) Defensive cleaning: Ensure no duplicate index issues before resampling
raw = raw.sort_index()
raw = raw[~raw.index.duplicated(keep="last")]

# 1) Daily calendar grid
raw_d = raw.asfreq("D")

# 2) Carry the last known "state" forward (Fixes Weekends & Holidays)
raw_d = raw_d.ffill()

# 3) Handle Pre-History for RRP
# Essential for calculating "Net Liquidity" (Fed Bal Sheet - TGA - RRP) back to 2000.
# Without this, your liquidity formula will be NaN for 13 years.
if "RRPONTSYD" in raw_d.columns:
    raw_d["RRPONTSYD"] = raw_d["RRPONTSYD"].fillna(0.0)

# 4) Month-end snapshot
m = raw_d.resample("ME").last()

m

Unnamed: 0_level_0,INDPRO,PAYEMS,UNRATE,ICSA,RSAFS,GACDISA066MSFRBNY,GACDFSA066MSFRBPHI,CPILFESL,PCEPILFE,CES0500000003,WALCL,RRPONTSYD,WTREGEN,NFCI,BAMLH0A0HYM2
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2000-01-31,91.5380,131011.0,4.0,286000.0,261545.0,,11.2,179.300,74.306,,,0.000,,,
2000-02-29,91.8239,131121.0,4.1,286000.0,265686.0,,14.5,179.400,74.415,,,0.000,,,4.87
2000-03-31,92.1504,131604.0,4.0,286000.0,269019.0,,19.4,180.000,74.568,,,0.000,,,5.11
2000-04-30,92.6989,131883.0,3.8,266000.0,264067.0,,10.0,180.300,74.617,,,0.000,,,5.11
2000-05-31,92.9499,132105.0,4.0,266000.0,265992.0,,13.9,180.700,74.697,,,0.000,,,5.83
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-07-31,101.8738,159511.0,4.2,224000.0,727727.0,5.5,15.9,328.656,126.426,36.43,6852491.0,245.530,719873.0,-0.45124,2.91
2025-08-31,101.6019,159485.0,4.3,224000.0,731700.0,11.9,-0.3,329.793,126.707,36.58,6852491.0,97.426,719873.0,-0.52147,3.13
2025-09-30,101.6729,159593.0,4.4,224000.0,732444.0,-8.7,23.2,330.542,126.955,36.65,6852491.0,97.426,719873.0,-0.52147,2.84
2025-10-31,101.6160,159488.0,4.4,224000.0,732633.0,10.7,-12.8,330.542,126.955,36.81,6587119.0,10.179,805139.0,-0.52147,2.81


In [11]:
missing_pct = (m.isna().mean() * 100).sort_values(ascending=False).round(2)
missing_pct

CES0500000003         23.79
WALCL                 11.58
WTREGEN               11.58
GACDISA066MSFRBNY      5.79
NFCI                   2.57
BAMLH0A0HYM2           0.32
INDPRO                 0.00
PAYEMS                 0.00
UNRATE                 0.00
ICSA                   0.00
RSAFS                  0.00
GACDFSA066MSFRBPHI     0.00
CPILFESL               0.00
PCEPILFE               0.00
RRPONTSYD              0.00
dtype: float64

In [12]:
# Signals & Rolling Z scores (robust to flat series thayt early RRP)
def yoy(x):
    return x.pct_change(12) * 100

def delta_3m(x):
    return x - x.shift(3)

def rolling_z(x, window=120, min_periods=60):
    mu = x.rolling(window, min_periods=min_periods).mean()
    sd = x.rolling(window, min_periods=min_periods).std(ddof=0)
    sd = sd.replace(0, np.nan)  # IMPORTANT: as this prevents inf when series is flat (e.g., RRP=0 for years)
    return (x - mu) / sd

# Transform rules (simple + sensible for V1)
USE_YOY = {
    "INDPRO", "PAYEMS", "RSAFS",
    "CPILFESL", "PCEPILFE", "CES0500000003",
    "WALCL"  # optional; if you prefer level/change instead, remove it
}

USE_LEVEL = {
    "GACDISA066MSFRBNY", "GACDFSA066MSFRBPHI",   # surveys are already stationary-ish
    "NFCI", "BAMLH0A0HYM2",                      # conditions/spreads: level is meaningful
    "RRPONTSYD", "WTREGEN",                      # stocks / balances
}

# everything else defaults to 3M change (UNRATE, ICSA)
signals = pd.DataFrame(index=m.index)
zscores = pd.DataFrame(index=m.index)

for code in m.columns:
    x = m[code]

    if code in USE_YOY:
        sig = yoy(x)
    elif code in USE_LEVEL:
        sig = x
    else:
        sig = delta_3m(x)

    # Apply sign convention so "higher = better growth / more inflation / easier liquidity"
    if code in SIGN:
        sig = SIGN[code] * sig

    signals[code] = sig
    zscores[code] = rolling_z(sig, window=120, min_periods=60)

signals.tail(), zscores.tail()

(              INDPRO    PAYEMS  UNRATE    ICSA     RSAFS  GACDISA066MSFRBNY  \
 DATE                                                                          
 2025-07-31  1.898561  0.954412    -0.0    -0.0  4.134309                5.5   
 2025-08-31  1.165976  0.892620    -0.1    -0.0  4.972606               11.9   
 2025-09-30  1.868079  0.807888    -0.3    -0.0  4.180185               -8.7   
 2025-10-31  2.157948  0.713573    -0.2    -0.0  3.469007               10.7   
 2025-11-30  2.518821  0.588202    -0.3 -5000.0  2.876942               18.7   
 
             GACDFSA066MSFRBPHI  CPILFESL  PCEPILFE  CES0500000003      WALCL  \
 DATE                                                                           
 2025-07-31                15.9  3.048603  2.859793       3.877958  -6.926788   
 2025-08-31                -0.3  3.112191  2.906731       3.831961  -6.926788   
 2025-09-30                23.2  3.025543  2.825879       3.736202  -6.926788   
 2025-10-31               -12.8  

In [13]:
def composite_z(df_z, members, min_k=None):
    members = [c for c in members if c in df_z.columns]
    sub = df_z[members]

    if min_k is None:
        # require at least half the bucket, minimum 2
        min_k = max(2, len(members)//2)

    count = sub.count(axis=1)
    score = sub.mean(axis=1, skipna=True)
    return score.where(count >= min_k)

scores = pd.DataFrame(index=zscores.index)
scores["growth"]    = composite_z(zscores, BUCKETS["growth"])
scores["inflation"] = composite_z(zscores, BUCKETS["inflation"])
scores["liquidity"] = composite_z(zscores, BUCKETS["liquidity"])

scores.tail()

Unnamed: 0_level_0,growth,inflation,liquidity
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-07-31,0.100671,0.086121,0.147614
2025-08-31,0.007867,0.089369,0.224328
2025-09-30,4.8e-05,0.013027,0.271791
2025-10-31,-0.117011,-0.128036,0.232748
2025-11-30,0.016441,-0.26312,0.233527


In [14]:
def regime_label(g, i):
    if pd.isna(g) or pd.isna(i):
        return np.nan
    if g >= 0 and i >= 0: return "Reflation (G↑ I↑)"
    if g >= 0 and i <  0: return "Goldilocks (G↑ I↓)"
    if g <  0 and i >= 0: return "Stagflation (G↓ I↑)"
    return "Deflation (G↓ I↓)"

regime = pd.Series(
    [regime_label(g, i) for g, i in zip(scores["growth"], scores["inflation"])],
    index=scores.index,
    name="regime"
)

regime.value_counts(dropna=True)

regime
Reflation (G↑ I↑)      78
Stagflation (G↓ I↑)    74
Goldilocks (G↑ I↓)     60
Deflation (G↓ I↓)      28
Name: count, dtype: int64

In [17]:
import plotly.io as pio
import plotly.graph_objects as go
pio.renderers.default = "vscode"

fig = go.Figure()
for c in ["growth", "inflation", "liquidity"]:
    fig.add_trace(go.Scatter(x=scores.index, y=scores[c], mode="lines", name=f"{c.title()} (z)"))

fig.add_hline(y=0)
fig.update_layout(
    title="Macro Tape — Growth / Inflation / Liquidity (rolling z composites)",
    hovermode="x unified",
    height=420
)
fig.show()

In [None]:
px_m = yf.download(ASSETS, start=START, auto_adjust=True, progress=False)["Close"].resample("M").last()
ret_1m = px_m.pct_change(1) * 100

df = pd.concat([regime, ret_1m], axis=1).dropna()
display(df.groupby("regime")[ASSETS].mean().round(2))


'M' is deprecated and will be removed in a future version, please use 'ME' instead.



Unnamed: 0_level_0,SPY,TLT,GLD,UUP,HYG
regime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Deflation (G↓ I↓),0.78,-0.07,1.17,-0.27,1.12
Goldilocks (G↑ I↓),1.07,0.68,0.25,0.11,0.5
Reflation (G↑ I↑),0.76,0.44,0.75,0.4,0.21
Stagflation (G↓ I↑),1.07,0.14,1.5,0.16,0.34


In [21]:
import plotly.express as px

df_scatter = (
    scores.assign(regime=regime)
    .dropna(subset=["growth","inflation","regime"])
    .reset_index()          # creates a DATE column (based on your screenshot)
)

fig = px.scatter(
    df_scatter,
    x="growth", y="inflation",
    color="regime",
    hover_data=["DATE"],
    title="Growth vs Inflation — Regime Map"
)
fig.add_hline(y=0); fig.add_vline(x=0)
fig.show()

In [22]:
import yfinance as yf
import plotly.express as px

px_m = yf.download(ASSETS, start=START, auto_adjust=True, progress=False)["Close"].resample("M").last()
ret_1m = px_m.pct_change(1) * 100
ret_3m = px_m.pct_change(3) * 100

df = pd.concat([regime, ret_1m.add_prefix("1m_"), ret_3m.add_prefix("3m_")], axis=1).dropna()

by_regime_1m = df.groupby("regime")[[f"1m_{a}" for a in ASSETS]].mean().round(2)
fig = px.imshow(by_regime_1m.values, x=by_regime_1m.columns, y=by_regime_1m.index,
                text_auto=True, aspect="auto", title="Avg 1M Returns by Regime (%)")
fig.show()


'M' is deprecated and will be removed in a future version, please use 'ME' instead.



In [23]:
import plotly.express as px

last = zscores.dropna(how="all").index[-1]

drivers = (
    zscores.loc[last].dropna()
    .rename("z").to_frame()
    .assign(name=lambda d: d.index.map(lambda c: FRED.get(c, c)))
    .sort_values("z", ascending=False)
    .reset_index().rename(columns={"index":"code"})
)

fig = px.bar(drivers.head(12), x="z", y="name", orientation="h",
             title=f"Top +Z Drivers (as of {last.date()})")
fig.show()

fig = px.bar(drivers.tail(12), x="z", y="name", orientation="h",
             title=f"Top -Z Drivers (as of {last.date()})")
fig.show()

In [24]:
regime.value_counts()

regime
Reflation (G↑ I↑)      78
Stagflation (G↓ I↑)    74
Goldilocks (G↑ I↓)     60
Deflation (G↓ I↓)      28
Name: count, dtype: int64

In [25]:
df_reg = pd.DataFrame({"regime": regime, "g": scores["growth"], "i": scores["inflation"]}).dropna()
df_reg[df_reg["regime"].str.contains("Deflation")].tail(24)

Unnamed: 0_level_0,regime,g,i
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-01-31,Deflation (G↓ I↓),-3.236839,-2.157273
2009-02-28,Deflation (G↓ I↓),-2.842057,-1.946574
2009-03-31,Deflation (G↓ I↓),-2.738507,-2.036042
2009-04-30,Deflation (G↓ I↓),-2.350955,-1.5917
2009-05-31,Deflation (G↓ I↓),-2.096718,-1.761619
2009-06-30,Deflation (G↓ I↓),-1.735614,-1.969044
2009-07-31,Deflation (G↓ I↓),-1.48756,-2.210992
2009-08-31,Deflation (G↓ I↓),-1.175283,-2.212638
2009-09-30,Deflation (G↓ I↓),-0.937003,-2.018727
2009-10-31,Deflation (G↓ I↓),-0.787985,-1.18129
