In [1]:
from pathlib import Path
from datetime import datetime
import pandas as pd


In [2]:
# ðŸ”’ MUST MATCH 00_download_fomc_statements_fraser.py
STATEMENT_RAW_DIR = Path("data/spine_us/us_sentiment/raw/fomc_statements")

CANONICAL_OUT = Path(
    "data/spine_us/us_sentiment/canonical/us_sentiment_statement_canonical.parquet"
)


In [3]:
def parse_meeting_date(fp: Path):
    """
    Filenames look like:
      2014-06-18_fomc_statement.txt
      2020-03-15_fomc_statement.txt
    We take the leading YYYY-MM-DD.
    """
    stem = fp.name.split("_")[0]  # '2014-06-18'
    return datetime.strptime(stem, "%Y-%m-%d").date()



In [4]:
def main():
    STATEMENT_RAW_DIR.mkdir(parents=True, exist_ok=True)

    print(f"[FedSpeak] Looking for statements in: {STATEMENT_RAW_DIR.resolve()}")
    files = sorted(STATEMENT_RAW_DIR.glob("*_fomc_statement.txt"))
    print(f"[FedSpeak] Found {len(files)} raw statement files.")

    if not files:
        print("[FedSpeak] âš  No statement files found; writing empty canonical parquet.")
        df = pd.DataFrame(columns=["meeting_date", "source", "path", "text"])
    else:
        records = []
        for fp in files:
            text = fp.read_text(encoding="utf-8", errors="ignore").strip()
            if not text:
                continue

            records.append(
                {
                    "meeting_date": parse_meeting_date(fp),
                    "source": "fomc_statement",
                    "path": str(fp),
                    "text": text,
                }
            )

        df = pd.DataFrame(records).sort_values("meeting_date").reset_index(drop=True)
        print(f"[FedSpeak] âœ… Ingested {len(df)} statement files into canonical.")

    CANONICAL_OUT.parent.mkdir(parents=True, exist_ok=True)
    df.to_parquet(CANONICAL_OUT, index=False)
    print(f"[FedSpeak] âœ… Saved canonical FOMC statements to {CANONICAL_OUT}")


if __name__ == "__main__":
    main()

[FedSpeak] Looking for statements in: C:\Users\Rand Sobczak Jr\_rts\3_AI\the_Spine\notebooks\us_sentiment\ingest\data\spine_us\us_sentiment\raw\fomc_statements
[FedSpeak] Found 0 raw statement files.
[FedSpeak] âš  No statement files found; writing empty canonical parquet.
[FedSpeak] âœ… Saved canonical FOMC statements to data\spine_us\us_sentiment\canonical\us_sentiment_statement_canonical.parquet
