In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from time import sleep

data = []

def fetch_beigebook(url, year, month):
    try:
        r = requests.get(url, timeout=10)
        if r.status_code != 200:
            return False
        
        soup = BeautifulSoup(r.text, "html.parser")
        paragraphs = []
        
        for p in soup.find_all("p"):
            parents = [parent.name for parent in p.parents]
            if not any(tag in parents for tag in ["header", "footer", "nav"]):
                text = p.get_text(strip=True)
                if text:
                    paragraphs.append(text)
        
        if paragraphs:
            combined = " ".join(paragraphs)
            data.append({
                "year": year,
                "month": month,
                "url": url,
                "text": combined
            })
            print(f"Fetched {year}-{month}, {len(paragraphs)} paragraphs")
        
        sleep(0.5)
        return True
    
    except Exception as e:
        print(f"Failed to fetch {url}: {e}")
        return False


# 1996 - 2010
for year in range(1996, 2011):
    for month in range(1, 13):
        for day in range(1, 32):
            url = f"https://www.federalreserve.gov/fomc/beigebook/{year}/{year}{month:02d}{day:02d}/default.htm"
            fetch_beigebook(url, year, month)


# 2011 - 2016
for year in range(2011, 2017):
    for month in range(1, 13):
        url = f"https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook{year}{month:02d}.htm?summary"
        fetch_beigebook(url, year, month)


# 2017 - 2025
for year in range(2017, 2026):
    for month in range(1, 13):
        url = f"https://www.federalreserve.gov/monetarypolicy/beigebook{year}{month:02d}-summary.htm"
        fetch_beigebook(url, year, month)


# Save to CSV
df = pd.DataFrame(data)
df.to_csv("beige_book_1996_2025.csv", index=False)
print("Saved beige_book_1996_2025.csv")


Fetched 1996-10, 26 paragraphs
Fetched 1996-12, 29 paragraphs
Fetched 1997-1, 22 paragraphs
Fetched 1997-3, 24 paragraphs
Fetched 1997-5, 23 paragraphs
Fetched 1997-6, 31 paragraphs
Fetched 1997-8, 27 paragraphs
Fetched 1997-9, 28 paragraphs
Fetched 1997-10, 22 paragraphs
Fetched 1997-12, 25 paragraphs
Fetched 1998-1, 27 paragraphs
Fetched 1998-3, 24 paragraphs
Fetched 1998-5, 26 paragraphs
Fetched 1998-6, 26 paragraphs
Fetched 1998-8, 23 paragraphs
Fetched 1998-9, 25 paragraphs
Fetched 1998-11, 23 paragraphs
Fetched 1998-12, 24 paragraphs
Fetched 1999-1, 27 paragraphs
Fetched 1999-3, 27 paragraphs
Fetched 1999-5, 22 paragraphs
Fetched 1999-6, 27 paragraphs
Fetched 1999-8, 27 paragraphs
Fetched 1999-9, 23 paragraphs
Fetched 1999-11, 23 paragraphs
Fetched 1999-12, 26 paragraphs
Fetched 2000-1, 30 paragraphs
Fetched 2000-3, 20 paragraphs
Fetched 2000-5, 20 paragraphs
Fetched 2000-6, 24 paragraphs
Fetched 2000-8, 30 paragraphs
Fetched 2000-9, 23 paragraphs
Fetched 2000-11, 23 paragraphs
F

In [4]:
print(df.size)

4


In [5]:
print(df["url"])


0    https://www.federalreserve.gov/fomc/beigebook/...
Name: url, dtype: object


In [8]:
print(df["text"].to_string())

0    January 22, 1997SummarySkip to contentSummaryD...


In [9]:
print(df.head())

   year  month                                                url  \
0  1997      1  https://www.federalreserve.gov/fomc/beigebook/...   

                                                text  
0  January 22, 1997SummarySkip to contentSummaryD...  


In [10]:
print(df.loc[0, "text"])

January 22, 1997SummarySkip to contentSummaryDistrictsBostonNew YorkPhiladelphiaClevelandRichmondAtlantaChicagoSt. LouisMinneapolisKansas CityDallasSan FranciscoFull reportPrepared at the Federal Reserve Bank of Atlanta and based on information collected before October 20, 1997.  This document summarizes comments received from businesses and other contacts outside the Federal Reserve and is not a commentary on the views of Federal Reserve officials.Most District reports characterized early autumn's economic activity as moderate to strong in most areas.  Retail sales in most Districts in September were weaker than anticipated, with some pickup in October.  Automobile sales were mostly slower.  Manufacturing activity accelerated or remained at high levels for most Districts, and the outlook was generally positive.   Despite the residential real estate markets' recent uneven performance, activity remains at elevated levels overall, and most commercial real estate markets continue to perfo