In [None]:
import pandas as pd
from statsmodels.tsa.api import VAR
import numpy as np
#from gk_replication_final import GKReplicationFinal
import re
import matplotlib.pyplot as plt

In [None]:
FRASER_API_KEY = "750c288e35e67b6a0d2bf5ceb93d5744"

In [None]:
def scalar_or_first(x):
  if isinstance(x, (list, tuple)):
      return x[0] if x else None
  try:
      import numpy as np
      if isinstance(x, np.ndarray):
        return x[0] if x.size > 0 else None
  except ImportError:
      pass
  return x

In [None]:
import requests
BASE_URL = "https://fraser.stlouisfed.org/api"
API_KEY = "750c288e35e67b6a0d2bf5ceb93d5744"

session = requests.Session()
session.headers.update({"X-API-Key": API_KEY})

def fetch_author_records(author_id, role="creator", per_page=100):
    all_records = []
    page = 1

    while True:
        params = {
            "format": "json",
            "page": page,
            "limit": per_page,
            "role": role,
            "fields": "originInfo!genre!recordInfo!titleInfo",
        }
        url = f"{BASE_URL}/author/{author_id}/records"
        resp = session.get(url, params=params)

        try:
            resp.raise_for_status()
        except requests.HTTPError as e:
            status = resp.status_code
            print(f"HTTP error for author {author_id} on page {page}: {e} (status {status})")
            if status == 500:
                print("FRASER returned 500 (Internal Server Error). "
                      "Internal error on their end.")

                break
            else:

                raise

        data = resp.json()
        records = data.get("records", [])
        total = data.get("total", 0)

        all_records.extend(records)

        print(f"page {page}: got {len(records)} records, total so far {len(all_records)} / {total}")

        if not records or len(all_records) >= total:
            break

        page += 1

    return all_records
def records_to_df(records, speaker_name, speaker_role):
    if not records:
        return pd.DataFrame(
            columns=[
                "date", "date_raw", "title", "subtitle", "title_part_number",
                "speaker", "speaker_role", "fraser_record_id", "genre", "source"
            ]
        )

    if isinstance(records, dict):
        records = [records]
    df_raw = pd.json_normalize(records)
    def extract_from_titleinfo(obj, key):
        if isinstance(obj, list) and obj:
            if isinstance(obj[0], dict):
                return obj[0].get(key)
        if isinstance(obj, dict):
            return obj.get(key)
        return None

    def scalar_or_first(x):
        if isinstance(x, (list, tuple)):
            return x[0] if x else None
        try:
            import numpy as np
            if isinstance(x, np.ndarray):
                return x[0] if x.size > 0 else None
        except ImportError:
            pass
        return x

    sort_date = df_raw.get("originInfo.sortDate")
    date_issued = df_raw.get("originInfo.dateIssued")

    if sort_date is not None:
        sort_date = sort_date.apply(scalar_or_first)
    if date_issued is not None:
        date_issued = date_issued.apply(scalar_or_first)

    if sort_date is not None:
        date_raw = sort_date
        if date_issued is not None:
            date_raw = date_raw.fillna(date_issued)
    else:
        date_raw = date_issued
    title_col = df_raw.get("titleInfo")
    title = title_col.apply(lambda x: extract_from_titleinfo(x, "title")) if title_col is not None else None
    subtitle = title_col.apply(lambda x: extract_from_titleinfo(x, "subTitle")) if title_col is not None else None
    title_part_number = title_col.apply(lambda x: extract_from_titleinfo(x, "titlePartNumber")) if title_col is not None else None

    rid_raw = df_raw.get("recordInfo.recordIdentifier")
    if rid_raw is None:
        rid = None
    else:
        rid = rid_raw.apply(scalar_or_first)

    genre_raw = df_raw.get("genre")
    if genre_raw is None:
        genre = None
    else:
        genre = genre_raw.apply(scalar_or_first)

        #finalizing DF
    df = pd.DataFrame({
        "date_raw": date_raw,
        "title": title,
        "subtitle": subtitle,
        "title_part_number": title_part_number,
        "speaker": speaker_name,
        "speaker_role": speaker_role,
        "fraser_record_id": rid,
        "genre": genre,
        "source": "FRASER",
    })

    df["date"] = pd.to_datetime(df["date_raw"], errors="coerce")

    return df[
        [
            "date", "date_raw", "title", "subtitle", "title_part_number",
            "speaker", "speaker_role", "fraser_record_id", "genre", "source"
        ]
    ]


#here I hardcoded the authors collecting all the names for chairmen and vice chairmen
CHAIR_VC_AUTHORS = [
    # ---------- CHAIRS  ----------
    {"author_id": 611,  "name": "Thomas B. McCabe",              "role": "Chair"},  # 1948–1951
    {"author_id": 13,   "name": "William McChesney Martin Jr.",  "role": "Chair"},  # 1951–1970
    {"author_id": 20,   "name": "Arthur F. Burns",               "role": "Chair"},  # 1970–1978
    {"author_id": 24,   "name": "G. William Miller",             "role": "Chair"},  # 1978–1979
    {"author_id": 23,   "name": "Paul A. Volcker",               "role": "Chair"},  # 1979–1987
    {"author_id": 21,   "name": "Alan Greenspan",                "role": "Chair"},  # 1987–2006
    {"author_id": 66,   "name": "Ben S. Bernanke",               "role": "Chair"},  # 2006–2014
    {"author_id": 2380, "name": "Janet L. Yellen",               "role": "Chair"},  # 2014–2018
    {"author_id": 2670, "name": "Jerome H. Powell",              "role": "Chair"},  # 2018–

    # ---------- VICE CHAIRS  ----------
    {"author_id": 56,  "name": "C. Canby Balderston",          "role": "Vice Chair"},  # 1955–1966
    {"author_id": 279,  "name": "J. L. Robertson",              "role": "Vice Chair"},  # 1966–1973
    {"author_id": 2374,  "name": "George W. Mitchell",           "role": "Vice Chair"},  # 1973–1976
    {"author_id": 2399,  "name": "Stephen S. Gardner",           "role": "Vice Chair"},  # 1976–1978
    {"author_id": 2372,  "name": "Frederick H. Schultz",         "role": "Vice Chair"},  # 1979–1982
    {"author_id": 2394,  "name": "Preston Martin",               "role": "Vice Chair"},  # 1982–1986
    {"author_id": 2388,  "name": "Manuel H. Johnson",            "role": "Vice Chair"},  # 1986–1990
    {"author_id": 2366,  "name": "David W. Mullins Jr.",         "role": "Vice Chair"},  # 1991–1994
    {"author_id": 2361,  "name": "Alan S. Blinder",              "role": "Vice Chair"},  # 1994–1996
    {"author_id": 2362,  "name": "Alice M. Rivlin",              "role": "Vice Chair"},  # 1996–1999
    {"author_id": 2396,  "name": "Roger W. Ferguson Jr.",        "role": "Vice Chair"},  # 1999–2006
    {"author_id": 400,  "name": "Donald L. Kohn",               "role": "Vice Chair"},  # 2006–2010
    {"author_id": 2380,  "name": "Janet L. Yellen",              "role": "Vice Chair"},  # 2010–2014
    {"author_id": 4067,  "name": "Stanley Fischer",              "role": "Vice Chair"},  # 2014–2017
    {"author_id": 10431,  "name": "Richard H. Clarida",           "role": "Vice Chair"},  # 2018–2022
    {"author_id": 4066,  "name": "Lael Brainard",                "role": "Vice Chair"},  # 2022–2023
]

all_dfs = []

for meta in CHAIR_VC_AUTHORS:
    aid = meta["author_id"]
    name = meta["name"]
    role = meta["role"]

    print(f"\n=== Fetching FRASER records for {name} ({role}), author_id={aid} ===")
    recs = fetch_author_records(aid, role="creator")
    df_person = records_to_df(recs, name, role)

    print(f"{name}: {len(df_person)} records after parsing")
    all_dfs.append(df_person)

if all_dfs:
    speeches_chair_vc = pd.concat(all_dfs, ignore_index=True)
else:
    speeches_chair_vc = pd.DataFrame(
        columns=[
            "date", "date_raw", "title", "subtitle", "title_part_number",
            "speaker", "speaker_role", "fraser_record_id", "genre", "source"
        ]
    )

speeches_chair_vc["date"] = pd.to_datetime(speeches_chair_vc["date"], errors="coerce")

speeches_chair_vc = (
    speeches_chair_vc
    .drop_duplicates(subset=["fraser_record_id"])
    .sort_values("date")
    .reset_index(drop=True)
)

print("\nFinal DF head:")
print(speeches_chair_vc.head())
print("Shape:", speeches_chair_vc.shape)


=== Fetching FRASER records for Thomas B. McCabe (Chair), author_id=611 ===
page 1: got 44 records, total so far 44 / 44
Thomas B. McCabe: 44 records after parsing

=== Fetching FRASER records for William McChesney Martin Jr. (Chair), author_id=13 ===
page 1: got 100 records, total so far 100 / 158
page 2: got 58 records, total so far 158 / 158
William McChesney Martin Jr.: 158 records after parsing

=== Fetching FRASER records for Arthur F. Burns (Chair), author_id=20 ===
page 1: got 33 records, total so far 33 / 33
Arthur F. Burns: 33 records after parsing

=== Fetching FRASER records for G. William Miller (Chair), author_id=24 ===
page 1: got 71 records, total so far 71 / 71
G. William Miller: 71 records after parsing

=== Fetching FRASER records for Paul A. Volcker (Chair), author_id=23 ===
page 1: got 67 records, total so far 67 / 67
Paul A. Volcker: 67 records after parsing

=== Fetching FRASER records for Alan Greenspan (Chair), author_id=21 ===
HTTP error for author 21 on page

  speeches_chair_vc = pd.concat(all_dfs, ignore_index=True)
