In [35]:
# 📦 Imports
from fredapi import Fred
import pandas as pd
import os
from scipy.stats import zscore

# 🔧 Setup
API_KEY = "YOUR API KEY HERE"
fred = Fred(api_key=API_KEY)
start_date = "2000-01-01"
end_date = "2025-01-01"
freq = "ME"  # Monthly end

# ✅ FRED feature mapping
features = {
    "DCOILWTICO": "layman__oil_price",               
    "GASREGW": "layman__gas_price",                  
    "MORTGAGE30US": "layman__mortgage_rate",         
    "CSUSHPINSA": "layman__housing_prices",          
    "CUUR0000SAF11": "layman__grocery_index",        
    "RSAFS": "layman__retail_sales",                 
    "CUUR0000SEHF02": "layman__electricity_cost_index", 
    "CUUR0000SETA02": "layman__used_car_prices",     
    "FEDMINNFRWG": "layman__minimum_wage_trend",      # ✅ NEW
    "CUUR0000SEHF01": "layman__water_bill_index",     # water/sewer CPI
    "CPIAUCSL": "linchpin__cpi_inflation",            # needed for real wage calc
    "UNRATE": "linchpin__unemployment_rate",          # for proxy
    "FEDFUNDS": "linchpin__federal_funds_rate",       # for proxy
}

# 🗓️ Monthly index
date_index = pd.date_range(start=start_date, end=end_date, freq=freq)
df = pd.DataFrame(index=date_index)

# 📥 Pull FRED data
for code, alias in features.items():
    try:
        series = fred.get_series(code, observation_start=start_date, observation_end=end_date)
        series = series.resample(freq).ffill()
        # series.index = pd.to_datetime(series.index)
        # series = series.reindex(df.index).ffill()
        df[alias] = series
        print(f"✅ Loaded {alias}")
    except Exception as e:
        df[alias] = pd.NA
        print(f"❌ Failed to load {alias}: {e}")

# # 🧠 Engineer wage-related signals if minimum wage data loaded
# if df["layman__minimum_wage_trend"].notna().sum() > 0:
#     try:
#         # Real minimum wage (inflation-adjusted)
#         df["layman__real_minimum_wage"] = df["layman__minimum_wage_trend"] / df["linchpin__cpi_inflation"]

#         # Wage pressure index (z-score based)
#         df["layman__wage_pressure_index"] = (
#             zscore(df["linchpin__cpi_inflation"].pct_change(fill_method=None)) * 0.4 -
#             zscore(df["linchpin__unemployment_rate"].diff()) * 0.3 +
#             zscore(df["linchpin__federal_funds_rate"].diff()) * 0.3
#         )

#         # Wage change momentum
#         df["min_wage_momentum"] = df["layman__minimum_wage_trend"].pct_change(fill_method=None)

#         # Residual responsiveness gap
#         df["layman__wage_response_gap"] = df["layman__wage_pressure_index"] - df["min_wage_momentum"]

#         print("✅ Engineered wage-related proxy features")
#     except Exception as e:
#         print(f"⚠️ Skipped wage proxy engineering: {e}")
# else:
#     print("⚠️ Skipping wage signal proxies — no usable minimum wage data")

# 🧼 Final formatting
df.index.name = "date"
df = df.ffill()

# 💾 Save batch
os.makedirs("layman_batches", exist_ok=True)
df.to_csv("layman_batches/Batch_Layman_01.csv")
print("✅ Saved Batch_Layman_01.csv")

✅ Loaded layman__oil_price
✅ Loaded layman__gas_price
✅ Loaded layman__mortgage_rate
✅ Loaded layman__housing_prices
✅ Loaded layman__grocery_index
✅ Loaded layman__retail_sales
✅ Loaded layman__electricity_cost_index
✅ Loaded layman__used_car_prices
✅ Loaded layman__minimum_wage_trend
✅ Loaded layman__water_bill_index
✅ Loaded linchpin__cpi_inflation
✅ Loaded linchpin__unemployment_rate
✅ Loaded linchpin__federal_funds_rate
✅ Saved Batch_Layman_01.csv
