In [38]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from time import sleep

data = []

def fetch_beigebook(url, year, month):
    try:
        r = requests.get(url, timeout=10)
        if r.status_code != 200:
            return False
        
        soup = BeautifulSoup(r.text, "html.parser")
        paragraphs = []
        
        for p in soup.find_all("p"):
            parents = [parent.name for parent in p.parents]
            if not any(tag in parents for tag in ["header", "footer", "nav"]):
                text = p.get_text(strip=True)
                if text.lower().startswith("full report"):
                    break
                if text:
                    paragraphs.append(text)
        
        if paragraphs:
            combined = " ".join(paragraphs)
            data.append({
                "year": year,
                "month": month,
                "url": url,
                "text": combined
            })
            print(f"Fetched {year}-{month}, {len(paragraphs)} paragraphs")
        
        sleep(0.5)
        return True
    
    except Exception as e:
        print(f"Failed to fetch {url}: {e}")
        return False


# 1996 - 2010
for year in range(1996, 2011):
    for month in range(1, 13):
        for day in range(1, 32):
            url = f"https://www.federalreserve.gov/fomc/beigebook/{year}/{year}{month:02d}{day:02d}/default.htm"
            fetch_beigebook(url, year, month)


# 2011 - 2016
for year in range(2011, 2017):
    for month in range(1, 13):
        url = f"https://www.federalreserve.gov/monetarypolicy/beigebook/beigebook{year}{month:02d}.htm?summary"
        fetch_beigebook(url, year, month)


# 2017 - 2025
for year in range(2017, 2026):
    for month in range(1, 13):
        url = f"https://www.federalreserve.gov/monetarypolicy/beigebook{year}{month:02d}-summary.htm"
        fetch_beigebook(url, year, month)


# Save to CSV
df = pd.DataFrame(data)
df.to_csv("beige_book_1996_2025.csv", index=False)
print("Saved beige_book_1996_2025.csv")


Fetched 1996-10, 3 paragraphs
Fetched 1996-12, 3 paragraphs
Fetched 1997-1, 3 paragraphs
Fetched 1997-3, 3 paragraphs
Fetched 1997-5, 3 paragraphs
Fetched 1997-6, 3 paragraphs
Fetched 1997-8, 3 paragraphs
Fetched 1997-9, 3 paragraphs
Fetched 1997-10, 3 paragraphs
Fetched 1997-12, 3 paragraphs
Fetched 1998-1, 3 paragraphs
Fetched 1998-3, 3 paragraphs
Fetched 1998-5, 3 paragraphs
Fetched 1998-6, 3 paragraphs
Fetched 1998-8, 3 paragraphs
Fetched 1998-9, 3 paragraphs
Fetched 1998-11, 3 paragraphs
Fetched 1998-12, 3 paragraphs
Fetched 1999-1, 3 paragraphs
Fetched 1999-3, 3 paragraphs
Fetched 1999-5, 3 paragraphs
Fetched 1999-6, 3 paragraphs
Fetched 1999-8, 3 paragraphs
Fetched 1999-9, 3 paragraphs
Fetched 1999-11, 3 paragraphs
Fetched 1999-12, 3 paragraphs
Fetched 2000-1, 3 paragraphs
Fetched 2000-3, 3 paragraphs
Fetched 2000-5, 3 paragraphs
Fetched 2000-6, 3 paragraphs
Fetched 2000-8, 3 paragraphs
Fetched 2000-9, 3 paragraphs
Fetched 2000-11, 3 paragraphs
Fetched 2000-12, 3 paragraphs
Fetc

In [39]:
print(df.size)

924


In [40]:
print(df["url"])


0      https://www.federalreserve.gov/fomc/beigebook/...
1      https://www.federalreserve.gov/fomc/beigebook/...
2      https://www.federalreserve.gov/fomc/beigebook/...
3      https://www.federalreserve.gov/fomc/beigebook/...
4      https://www.federalreserve.gov/fomc/beigebook/...
                             ...                        
226    https://www.federalreserve.gov/monetarypolicy/...
227    https://www.federalreserve.gov/monetarypolicy/...
228    https://www.federalreserve.gov/monetarypolicy/...
229    https://www.federalreserve.gov/monetarypolicy/...
230    https://www.federalreserve.gov/monetarypolicy/...
Name: url, Length: 231, dtype: object


In [41]:
print(df["text"].to_string())

0      October 30, 1996SummarySkip to contentSummaryD...
1      December 4, 1996SummarySkip to contentSummaryD...
2      January 22, 1997SummarySkip to contentSummaryD...
3      March 12, 1997SummarySkip to contentSummaryDis...
4      May 7, 1997SummarySkip to contentSummaryDistri...
5      June 18, 1997SummarySkip to contentSummaryDist...
6      August 6, 1997SummarySkip to contentSummaryDis...
7      September 17, 1997SummarySkip to contentSummar...
8      October 29, 1997SummarySkip to contentSummaryD...
9      December 3, 1997SummarySkip to contentSummaryD...
10     January 21, 1998SummarySkip to contentSummaryD...
11     March 18, 1998SummarySkip to contentSummaryDis...
12     May 6, 1998SummarySkip to contentSummaryDistri...
13     June 17, 1998SummarySkip to contentSummaryDist...
14     August 5, 1998SummarySkip to contentSummaryDis...
15     September 16, 1998SummarySkip to contentSummar...
16     November 4, 1998SummarySkip to contentSummaryD...
17     December 9, 1998SummaryS

In [42]:
print(df.head())

   year  month                                                url  \
0  1996     10  https://www.federalreserve.gov/fomc/beigebook/...   
1  1996     12  https://www.federalreserve.gov/fomc/beigebook/...   
2  1997      1  https://www.federalreserve.gov/fomc/beigebook/...   
3  1997      3  https://www.federalreserve.gov/fomc/beigebook/...   
4  1997      5  https://www.federalreserve.gov/fomc/beigebook/...   

                                                text  
0  October 30, 1996SummarySkip to contentSummaryD...  
1  December 4, 1996SummarySkip to contentSummaryD...  
2  January 22, 1997SummarySkip to contentSummaryD...  
3  March 12, 1997SummarySkip to contentSummaryDis...  
4  May 7, 1997SummarySkip to contentSummaryDistri...  


In [48]:
print(df.loc[230, "text"])

An official website of the United States Government Official websites use .govA.govwebsite belongs to an official government organization in the United States. Secure .gov websites use HTTPSAlock(LockLocked padlock icon) orhttps://means you've safely connected to the .gov website. Share sensitive information only on official, secure websites. Economic activity changed little on balance since the previous report, with three Districts reporting slight to modest growth in activity, five reporting no change, and four noting a slight softening. Overall consumer spending, particularly on retail goods, inched down in recent weeks, although auto sales were boosted in some Districts by strong demand for electric vehicles ahead of the expiration of a federal tax credit at the end of September. Demand for leisure and hospitality services by international travelers fell further over the reporting period, while demand by domestic consumers was largely unchanged. Nevertheless, spending by higher-inc

In [44]:
print(df.loc[0, "url"])

https://www.federalreserve.gov/fomc/beigebook/1996/19961030/default.htm


In [45]:
print(df.columns)

Index(['year', 'month', 'url', 'text'], dtype='object')


In [46]:
import requests
r = requests.get("https://www.federalreserve.gov/fomc/beigebook/1996/19961030/default.htm")
print(r.text[:2000])


 

		

		



	



		

		













<html>

<head>

     <title>FRB: Beige Book - Summary</title>

<link rel="stylesheet" href="/assets/main.css" data-injected="worker-mhclkul1"><script src="/assets/main.js" data-injected="worker-mhclkul1" defer type="module"></script></head>

<body bgcolor="#FFFFFF"><BASE TARGET="_top">

<a name="pagetop"></a>



<a href="/"><img src="/gifjpg/sm_head2.gif" width=275 height=73 alt="The Federal Reserve Board eagle logo links to home page" border="0"></a>

<hr noshade>

<p>

	

	<table border=0 cellpadding=4>

     <tr>

     <td valign=top>

	 

	     <a href="../../1996/"><img src="/gifjpg/Beige2.gif" width=80 height=95 border=0 alt="Beige Book logo links to Beige Book home page for year currently displayed"></a>

	 

     </td>

     

 	

 	<td valign=top width=500>

		<strong>

		<font size=+1>October 30, 1996</font>



			

				<p><br>

				<font size=+3>

				Summary

				</font>

				</strong>

				     </td>

				     </tr>

				

				     <tr