In [1]:
from pathlib import Path
import pandas as pd, numpy as np

DATA_DIR = Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

ohlcv_path = DATA_DIR / "ohlcv.csv"  # expected from masterframes_ingestion.ipynb
df = pd.read_csv(ohlcv_path)
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values(["ticker","date"])

# --- helpers ---
def rsi14(close):
    delta = close.diff()
    up = delta.clip(lower=0.0)
    dn = -delta.clip(upper=0.0)
    roll_up = up.ewm(alpha=1/14, adjust=False).mean()
    roll_dn = dn.ewm(alpha=1/14, adjust=False).mean()
    rs = roll_up / roll_dn.replace(0, np.nan)
    return 100 - (100/(1+rs))

def macd(close, fast=12, slow=26, signal=9):
    ema_fast = close.ewm(span=fast, adjust=False).mean()
    ema_slow = close.ewm(span=slow, adjust=False).mean()
    line = ema_fast - ema_slow
    sig  = line.ewm(span=signal, adjust=False).mean()
    hist = line - sig
    return line, sig, hist

def rolling_vol(ret, win):
    return ret.rolling(win).std() * np.sqrt(252)

# daily returns
df["ret"] = df.groupby("ticker")["close"].pct_change()

# compute per ticker
out = []
tickers = df["ticker"].unique().tolist()
has_spy = "SPY" in tickers
spy_ret = None

if has_spy:
    spy = df[df["ticker"]=="SPY"][["date","ret"]].rename(columns={"ret":"ret_spy"})
    spy_ret = spy.set_index("date")["ret_spy"]

for tk, g in df.groupby("ticker", sort=False):
    g = g.sort_values("date").set_index("date")
    close = g["close"]

    rec = pd.DataFrame(index=g.index)
    rec["ticker"] = tk
    rec["sma20"] = close.rolling(20).mean()
    rec["sma50"] = close.rolling(50).mean()
    rec["ema20"] = close.ewm(span=20, adjust=False).mean()
    rec["ema50"] = close.ewm(span=50, adjust=False).mean()
    rec["rsi14"] = rsi14(close)

    line, sig, hist = macd(close)
    rec["macd_line"] = line
    rec["macd_signal"] = sig
    rec["macd_hist"] = hist

    rec["vol20"] = rolling_vol(g["ret"], 20)
    rec["vol60"] = rolling_vol(g["ret"], 60)

    if has_spy and tk != "SPY":
        joined = pd.concat([g["ret"], spy_ret], axis=1).dropna()
        joined.columns = ["ret_tk","ret_spy"]
        corr60 = joined["ret_tk"].rolling(60).corr(joined["ret_spy"])
        rec["rollcorr60_spy"] = corr60.reindex(rec.index)
    else:
        rec["rollcorr60_spy"] = np.nan

    rec = rec.reset_index().rename(columns={"index":"date"})
    out.append(rec)

wide = pd.concat(out, ignore_index=True)
# long-form (ticker, date, metric, value)
long = wide.melt(id_vars=["ticker","date"], var_name="metric", value_name="value")

# persist ONE canonical table (long form)
long_path = DATA_DIR / "indicators.csv"
long.to_csv(long_path, index=False)

# (optional) speed companion
parq_path = DATA_DIR / "indicators_wide.parquet"
wide.to_parquet(parq_path, index=False)

long_path, parq_path

ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.