In [7]:
# 📓 Notebook: Real Economic Data Loader for synthetic_signals.csv Format

import pandas as pd
import json
import os
from fredapi import Fred

# --- Config ---
API_KEY = "YOUR API KEY HERE"  # Replace with your actual FRED API key
START_DATE = "2000-01-01"
END_DATE = "2024-01-01"
OUTPUT_FOLDER = "sim_data"

fred = Fred(api_key=API_KEY)

# --- Example Concept-Mapped Signals ---
real_signal_map = {
    "GDP": {
        "source": "FRED",
        "concept_pool": "linchpins",
        "alias": "gross_domestic_product"
    },
    "CPIAUCSL": {
        "source": "FRED",
        "concept_pool": "layman_friendly",
        "alias": "inflation_rate"
    },
    "UNRATE": {
        "source": "FRED",
        "concept_pool": "linchpins",
        "alias": "unemployment_rate"
    },
    "FEDFUNDS": {
        "source": "FRED",
        "concept_pool": "linchpins",
        "alias": "federal_funds_rate"
    },
    "UMCSENT": {
        "source": "FRED",
        "concept_pool": "layman_friendly",
        "alias": "consumer_sentiment"
    }
    # Add more signals here...
}

# --- Function: Load + Format Signals ---
def load_and_format_signals(signal_map, start, end):
    df = pd.DataFrame()
    for series_id, meta in signal_map.items():
        try:
            data = fred.get_series(series_id, observation_start=start, observation_end=end)
            df[meta["alias"]] = data
        except Exception as e:
            print(f"Error loading {series_id}: {e}")
    df.index.name = "date"
    df = df.resample("M").mean().dropna(how="all")  # Ensure regular monthly format
    return df

# --- Generate & Save ---
real_df = load_and_format_signals(real_signal_map, START_DATE, END_DATE)
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
real_df.to_csv(os.path.join(OUTPUT_FOLDER, "real_signals.csv"))

with open(os.path.join(OUTPUT_FOLDER, "real_signal_metadata.json"), "w") as f:
    json.dump(real_signal_map, f, indent=2)

print(f"✅ Saved real_signals.csv and real_signal_metadata.json to {OUTPUT_FOLDER}")

✅ Saved real_signals.csv and real_signal_metadata.json to sim_data


  df = df.resample("M").mean().dropna(how="all")  # Ensure regular monthly format
