In [1]:
# Scraping the minutes
from bs4 import BeautifulSoup
import requests
import re
import urllib.request
import os

# Before 1993, the "Record of Policy action" is released ...
# (not immediately) after each meeting. Therefore, we are not ...
# interested in that period.

# From 1994 until now, the statement after each meeting is published ...
# immediately and they are our target in researching.
l_state = len("monetary")
statement_links = {}

# Again, we split our code into 2 subparts, before and after 2014
for year in range(1994, 2020): # from 1994 - 2019
    if year < 2014:
        base_url = "https://www.federalreserve.gov/monetarypolicy/"
        path = "fomchistorical" + str(year) + ".htm"
        html_doc = requests.get(base_url + path)
        soup = BeautifulSoup(html_doc.content, 'html.parser')
        links = soup.find_all("a", string = "Statement")
        link_base_url = "https://www.federalreserve.gov"
        statement_links[str(year)] = [link_base_url + link["href"] for link in links]
        print("Year Completed: ", year)
    elif year in [2014, 2015]:
        base_url = "https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm"
        html_doc = requests.get(base_url)
        soup = BeautifulSoup(html_doc.content, 'html.parser')
        links = soup.find_all("a", string = "Statement")
        link_base_url = "https://www.federalreserve.gov"
        final_link = []
        for link in links:
            p = re.compile("[^/]*$")
            if p.search(link.get("href")).group()[:(l_state+4)] == 'monetary' + str(year):
                final_link.append(link_base_url + link["href"])
        statement_links[str(year)] = final_link
        print("Year Completed: ", year)
    else:
        # After 2014, since all years are located in only one URL, we try to extract them out
        base_url = "https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm"
        html_doc = requests.get(base_url)
        soup = BeautifulSoup(html_doc.content, 'html.parser')
        links = soup.find_all("a", string = re.compile("PDF.*"))
        link_base_url = "https://www.federalreserve.gov"
        final_link = []
        for link in links:
            p = re.compile("[^/]*$")
            if p.search(link.get("href")).group()[:(l_state+4)] == 'monetary' + str(year):
                final_link.append(link_base_url + link["href"])
        statement_links[str(year)] = final_link
        print("Year Completed: ", year)

Year Completed:  1994
Year Completed:  1995
Year Completed:  1996
Year Completed:  1997
Year Completed:  1998
Year Completed:  1999
Year Completed:  2000
Year Completed:  2001
Year Completed:  2002
Year Completed:  2003
Year Completed:  2004
Year Completed:  2005
Year Completed:  2006
Year Completed:  2007
Year Completed:  2008
Year Completed:  2009
Year Completed:  2010
Year Completed:  2011
Year Completed:  2012
Year Completed:  2013
Year Completed:  2014
Year Completed:  2015
Year Completed:  2016
Year Completed:  2017
Year Completed:  2018
Year Completed:  2019


In [2]:
# Scrape them
for year in statement_links.keys():
    if not os.path.exists("./FOMCstatements/" + year):
        os.makedirs("./FOMCstatements/" + year)
    if int(year) < 2016:
        for link in statement_links[year]:
            p = re.compile(year + "[0-9][0-9][0-9][0-9]")
            name = p.search(str(link))
            response = urllib.request.urlretrieve(str(link), name.group() + ".txt")
            cwd = os.getcwd()
            os.rename(cwd + "/" + name.group() + ".txt", "./FOMCstatements/" + year + "/" + name.group() + ".txt")
        print("Download completed: " + year)
    else:
        for link in statement_links[year]:
            response = urllib.request.urlopen(str(link))
            name = re.search("[^/]*$", str(link))
            with open("./FOMCstatements/" + year + "/" + name.group(), 'wb') as f:
                f.write(response.read())
        print("Download completed" + year)

Download completed: 1994
Download completed: 1995
Download completed: 1996
Download completed: 1997
Download completed: 1998
Download completed: 1999
Download completed: 2000
Download completed: 2001
Download completed: 2002
Download completed: 2003
Download completed: 2004
Download completed: 2005
Download completed: 2006
Download completed: 2007
Download completed: 2008
Download completed: 2009
Download completed: 2010
Download completed: 2011
Download completed: 2012
Download completed: 2013
Download completed: 2014
Download completed: 2015
https://www.federalreserve.gov/monetarypolicy/files/monetary20160127a1.pdf
https://www.federalreserve.gov/monetarypolicy/files/monetary20160316a1.pdf
https://www.federalreserve.gov/monetarypolicy/files/monetary20160427a1.pdf
https://www.federalreserve.gov/monetarypolicy/files/monetary20160615a1.pdf
https://www.federalreserve.gov/monetarypolicy/files/monetary20160727a1.pdf
https://www.federalreserve.gov/monetarypolicy/files/monetary20160921a1.pdf
