In [94]:
import lxml
import lxml.html
import requests
import cssselect
from bs4 import BeautifulSoup
import time
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import re

This notebook is intended to document the text-gathering process for analyzing the federal reserve's use of language.

In [134]:
url = "https://www.federalreserve.gov/monetarypolicy/fomchistorical{}.htm"
base_url = "https://www.federalreserve.gov"
years = range(1936, 2013, 1)

In [102]:
def get_statement_links(year):
    r = requests.get(url.format(year))
    soup = BeautifulSoup(r.text, "html.parser")
    links = soup.find_all("a")
    statements = []
    
    for link in links:
        if link.text == "Statement":
            statements.append(link)
    state_links = []
    
    for link in statements:
        state_links.append(requests.compat.urljoin(base_url, link.get('href')))
    return(state_links)

In [104]:
def get_statement_texts(links):
    dates = []
    texts = []

    for link in links:
        r = requests.get(link)
        soup = BeautifulSoup(r.text, "html.parser")
        dte = soup.find("p", {"class": "article__time"}).contents
        txt = soup.find("div", {"class" : "col-xs-12 col-sm-8 col-md-8"}).get_text()
        txt = re.sub("\n", "", txt)
        dates.append(dte[0])
        texts.append(txt)

    statements_df = pd.DataFrame({"Dates": dates,
                                  "Texts": texts})
    return(statements_df)

'       Information received since the Federal Open Market Committee met in October suggests that economic activity and employment have continued to expand at a moderate pace in recent months, apart from weather-related disruptions. Although the unemployment rate has declined somewhat since the summer, it remains elevated. Household spending has continued to advance, and the housing sector has shown further signs of improvement, but growth in business fixed investment has slowed. Inflation has been running somewhat below the Committeeâ\x80\x99s longer-run objective, apart from temporary variations that largely reflect fluctuations in energy prices. Longer-term inflation expectations have remained stable.           Consistent with its statutory mandate, the Committee seeks to foster maximum employment and price stability. The Committee remains concerned that, without sufficient policy accommodation, economic growth might not be strong enough to generate sustained improvement in labor ma

In [136]:
statements_master = pd.DataFrame(columns = ["Dates", "Texts"])
years = range(2006, 2013)
for year in years:
    print(year)
    temp_df = get_statement_texts(get_statement_links(year))
    statements_master = statements_master.append(temp_df)

2006
2007
2008
2009
2010
2011
2012


In [137]:
statements_master

Unnamed: 0,Dates,Texts
0,"January 31, 2006",The Federal Open Market Committee decided toda...
1,"March 28, 2006",The Federal Open Market Committee decided toda...
2,"May 10, 2006",The Federal Open Market Committee decided toda...
3,"June 29, 2006",The Federal Open Market Committee decided toda...
4,"August 08, 2006",The Federal Open Market Committee decided toda...
5,"September 20, 2006",The Federal Open Market Committee decided toda...
6,"October 25, 2006",The Federal Open Market Committee decided toda...
7,"December 12, 2006",The Federal Open Market Committee decided toda...
0,"January 31, 2007",The Federal Open Market Committee decided toda...
1,"March 21, 2007",The Federal Open Market Committee decided toda...


In [3]:
r = requests.get(url.format("2012"))
soup = BeautifulSoup(r.text, "html.parser")

In [4]:
links = soup.find_all("a")
statements = []
for link in links:
    if link.text == "Statement":
        statements.append(link)

In [5]:
statements

[<a href="/newsevents/pressreleases/monetary20120125a.htm">Statement</a>,
 <a href="/newsevents/pressreleases/monetary20120313a.htm">Statement</a>,
 <a href="/newsevents/pressreleases/monetary20120425a.htm">Statement</a>,
 <a href="/newsevents/pressreleases/monetary20120620a.htm">Statement</a>,
 <a href="/newsevents/pressreleases/monetary20120801a.htm">Statement</a>,
 <a href="/newsevents/pressreleases/monetary20120913a.htm">Statement</a>,
 <a href="/newsevents/pressreleases/monetary20121024a.htm">Statement</a>,
 <a href="/newsevents/pressreleases/monetary20121212a.htm">Statement</a>]

In [6]:
state_links = []
for link in statements:
    state_links.append(requests.compat.urljoin(base_url, link.get('href')))

In [14]:
r = requests.get(state_links[0])
soup = BeautifulSoup(r.text, "html.parser")

In [49]:
?pd.DataFrame.from_records

In [97]:
dates = []
texts = []

for link in state_links:
    r = requests.get(link)
    soup = BeautifulSoup(r.text, "html.parser")
    dte = soup.find("p", {"class": "article__time"}).contents
    txt = soup.find("div", {"class" : "col-xs-12 col-sm-8 col-md-8"}).get_text()
    txt = re.sub("\n", "", txt)
    dates.append(dte[0])
    texts.append(txt)
    
statements_df = pd.DataFrame({"Dates": dates,
                              "Texts": texts})

In [98]:
statements_df

Unnamed: 0,Dates,Texts
0,"January 25, 2012",Information received since the Federal ...
1,"March 13, 2012",Information received since the Federal ...
2,"April 25, 2012",Information received since the Federal ...
3,"June 20, 2012",Information received since the Federal Open Ma...
4,"August 01, 2012",Information received since the Federal ...
5,"September 13, 2012",Information received since the Federal Open Ma...
6,"October 24, 2012",Information received since the Federal ...
7,"December 12, 2012",Information received since the Federal ...


In [55]:
soup.find("p", {"class": "article__time"}).contents

['December 12, 2012']

In [56]:
soup.find("div", {"class" : "col-xs-12 col-sm-8 col-md-8"}).contents

['\n', <p>
        Information received since the Federal Open Market Committee met in October suggests that economic activity and employment have continued to expand at a moderate pace in recent months, apart from weather-related disruptions. Although the unemployment rate has declined somewhat since the summer, it remains elevated. Household spending has continued to advance, and the housing sector has shown further signs of improvement, but growth in business fixed investment has slowed. Inflation has been running somewhat below the Committeeâs longer-run objective, apart from temporary variations that largely reflect fluctuations in energy prices. Longer-term inflation expectations have remained stable.
     </p>, '\n', <p>
        Consistent with its statutory mandate, the Committee seeks to foster maximum employment and price stability. The Committee remains concerned that, without sufficient policy accommodation, economic growth might not be strong enough to generate sustained

In [39]:
state_links[1]

'https://www.federalreserve.gov/newsevents/pressreleases/monetary20120313a.htm'