# 01 — Data Prep

This notebook loads raw price & macro data, cleans it, and builds features.

*If you don't have real CSVs yet, synthetic samples are used so the pipeline runs.*

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from src.feature_engineering import build_features

RAW = Path('../data/raw')
PROC = Path('../data/processed')

def load_price_csv(name):
    df = pd.read_csv(RAW / name)
    df['date'] = pd.to_datetime(df['date'])
    df = df.set_index('date').sort_index()
    return df['close']

# Load (replace with your real files)
spx = load_price_csv('sp500.csv')      # expected columns: date, close
btc = load_price_csv('btc_usd.csv')    # expected columns: date, close

# Optional: macro placeholders (align on index when you have real macro data)
macro = pd.DataFrame(index=spx.index)
macro['policy_rate'] = 0.02  # placeholder constant

# Build features for S&P 500
spx_feat = build_features(spx, macro=None)
btc_feat = build_features(btc, macro=None)

PROC.mkdir(parents=True, exist_ok=True)
spx_feat.to_csv(PROC / 'sp500_features.csv')
btc_feat.to_csv(PROC / 'btc_features.csv')

spx_feat.head(), btc_feat.head()


**Next:** open `02-gsadf-analysis.ipynb` to compute GSADF statistics and bubble periods.