# Beigebook

Commonly known as the Beige Book, this report is published eight times per year. Each Federal Reserve Bank gathers anecdotal information on current economic conditions in its District through reports from Bank and Branch directors and interviews with key business contacts, economists, market experts, and other sources. The Beige Book summarizes this information by District and sector. An overall summary of the twelve district reports is prepared by a designated Federal Reserve Bank on a rotating basis.

In [28]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from bs4.dammit import EncodingDetector
import requests
import datetime as dt

In [29]:
# Load previous statements
previous_beigebooks = pd.read_csv('bbooks.csv')

# Create list of dates for beige books that have already been scraped
saved_dates = previous_beigebooks['date'].drop_duplicates().to_list()

In [30]:
# This page has the links of all the beige books for the current year 
base_page = "https://federalreserve.gov/monetarypolicy/beige-book-default.htm"

# Download html
base_html = requests.get(base_page)

# Parse HTML to bs4 object
base_soup = BeautifulSoup(base_html.text, "html.parser")

# For safety, find the year on the webpage and save for later
link_year = base_soup.find(id='year').text

In [31]:
# Create list to store beige book links in
current_year_links = []

# Find the table that contains the dates and links
current_link_table = base_soup.find('tbody')

# Parse the table to find the links for the individual beige books and dates
beigebook_dates = current_link_table.find_all('tr')


for date_section in beigebook_dates:
    link_date = date_section.find('td').text+', '+link_year
    formatted_date = dt.datetime.strptime(str(link_date), '%B %d, %Y').strftime('%Y%m%d')
    
# If we already have the beigebook parsed and saved, skip
    
    if formatted_date not in str(saved_dates):
        try:
            date_html_link = date_section.find('a', text='HTML')
            link_append='https://www.federalreserve.gov'+date_html_link.attrs['href']
            current_year_links.append([formatted_date, link_append])
        except:
            pass
        
# Create DataFrame of dates and links
book_link_table = pd.DataFrame(current_year_links, columns = ['date','link'])

In [32]:
def fetch_sections(date, link):
    """Given a date and link, download the html text and parse in to individual regions and sections"""

    text_html = requests.get(link)
    soupy_book = BeautifulSoup(text_html.text, "html.parser")
    paragraphs = soupy_book.find_all('p')
    
    sections = []

    for p in paragraphs:
        text_break = p.find('br')
        if text_break != None:
            section = text_break.previous
            text = text_break.next.strip('\n')
            
            # There is no previous h4 element, assume it is the National Summary
            try:
                region = text_break.find_previous('h4').text
            except:
                region = 'National Summary'
            
            data = [date,section,text,region]
            sections.extend([data]) 

    text_df = pd.DataFrame(sections, columns = ['date','section','text','region'])

    return text_df

In [33]:
# If there are reports available that we haven't already downloaded, parse and save those reports
if len(book_link_table) > 0:
    print('running')
    
    # create DataFrame for new beige books
    add_books = pd.DataFrame()
    
    for row in book_link_table.index:
        book = fetch_sections(book_link_table.loc[row,'date'], book_link_table.loc[row,'link'])

        add_books = add_books.append(book, sort=False)

    all_books = previous_beigebooks.append(add_books, sort=False)
    all_books['date'] = all_books['date'].apply(lambda x: dt.datetime.strptime(str(x), '%Y%m%d').strftime('%Y%m%d'))
    all_books = all_books.sort_values('date', ascending=False)

    all_books.to_csv('bbooks.csv', index=False)
    
else:
    all_books = previous_beigebooks

running


# FOMC_statements

In [None]:
from bs4 import BeautifulSoup
import re
import pandas as pd
from bs4 import BeautifulSoup
from bs4.dammit import EncodingDetector
import requests
import datetime as dt

In [None]:
fomc_statement_links = []

base_url = 'https://www.federalreserve.gov/newsevents/pressreleases/'

year_string = str(dt.date.today().year)

resp = requests.get(base_url+year_string+'-press.htm')

soup = BeautifulSoup(resp.content, 'html.parser')

statements = soup.find_all('a', href=re.compile('^/newsevents/pressreleases/monetary\d{8}a.htm'))

statement_links = [statement.attrs['href'] for statement in statements]

for link in statement_links:
    fomc_statement_links.append(str("https://www.federalreserve.gov"+link))

def _date_from_link(link):
    date = re.findall('[0-9]{8}', link)[0]
    date = "{}/{}/{}".format(date[4:6], date[6:], date[:4])

    return date

In [None]:
statements_df = pd.DataFrame()

for link in fomc_statement_links:

    page = requests.get(link)
    soup2 = BeautifulSoup(page.text, "html.parser")
    try:
        tags2 = soup2.find(id='article')
        paragraphs = tags2.findAll('p')
    except:
        paragraphs = soup2.findAll('p')

    statement = "\n\n".join([text.get_text().strip() for text in paragraphs])
    date= _date_from_link(link)

    statements_df = statements_df.append(pd.DataFrame([[date, statement]],columns=['date','statement']))

statements_df = statements_df.set_index('date')
statement_dates = statements_df.index.tolist()

previous_statements = pd.read_csv('fomcstatements.csv', index_col='date')

keep_dates = statements_df.loc[[word for word in statement_dates if word not in previous_statements.index.tolist()]]

if len(keep_dates) > 0:
    print('Adding statements...')
    fed_statements = previous_statements.append(keep_dates)

    fed_statements.to_csv('fomcstatements.csv')
    
    else:
        fed_statements = previous_statements

In [None]:
fed_statements.head()