In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

URL = "https://www.federalreserve.gov/monetarypolicy/fomcminutes20210616.htm"
page = requests.get(URL)

soup = BeautifulSoup(page.content, "html.parser")

In [2]:
article = soup.find("div", {"id": "article"})
text = article.get_text()

#text = article.text

#letters_only = re.sub("[^a-zA-Z]",  # Search for all non-letters
#                          " ",          # Replace all non-letters with spaces
#                          str(text))
print(text)


Minutes of the Federal Open Market Committee

A joint meeting of the Federal Open Market Committee and the Board of Governors was held by videoconference on Tuesday, June 15, 2021, at 9:00 a.m. and continued on Wednesday, June 16, 2021, at 9:00 a.m.1
PRESENT:
Jerome H. Powell, Chair
John C. Williams, Vice Chair
Thomas I. Barkin
Raphael W. Bostic
Michelle W. Bowman
Lael Brainard
Richard H. Clarida
Mary C. Daly
Charles L. Evans
Randal K. Quarles
Christopher J. Waller
James Bullard, Esther L. George, Naureen Hassan, Loretta J. Mester, and Eric Rosengren, Alternate Members of the Federal Open Market Committee
Patrick Harker, Robert S. Kaplan, and Neel Kashkari, Presidents of the Federal Reserve Banks of Philadelphia, Dallas, and Minneapolis, respectively
James A. Clouse, Secretary
Matthew M. Luecke, Deputy Secretary
Michelle A. Smith, Assistant Secretary
Mark E. Van Der Weide, General Counsel
Michael Held, Deputy General Counsel
Trevor A. Reeve, Economist
Stacey Tevlin, Economist
Beth Ann

In [3]:
import nltk
from pprint import pprint
import string

In [4]:
words = nltk.word_tokenize(text)

In [66]:
stopwords = nltk.corpus.stopwords.words("english")
stopwords.extend(["governors", "board", "federal", "reserve"])
words =[word.lower() for word in words if word.isalpha() if word not in stopwords]

In [67]:
print(words)

['minutes', 'open', 'market', 'committee', 'joint', 'meeting', 'open', 'market', 'committee', 'held', 'videoconference', 'tuesday', 'june', 'continued', 'wednesday', 'june', 'present', 'jerome', 'powell', 'chair', 'john', 'williams', 'vice', 'chair', 'thomas', 'barkin', 'raphael', 'bostic', 'michelle', 'bowman', 'lael', 'brainard', 'richard', 'clarida', 'mary', 'daly', 'charles', 'evans', 'randal', 'quarles', 'christopher', 'waller', 'james', 'bullard', 'esther', 'george', 'naureen', 'hassan', 'loretta', 'mester', 'eric', 'rosengren', 'alternate', 'members', 'open', 'market', 'committee', 'patrick', 'harker', 'robert', 'kaplan', 'neel', 'kashkari', 'presidents', 'banks', 'philadelphia', 'dallas', 'minneapolis', 'respectively', 'james', 'clouse', 'secretary', 'matthew', 'luecke', 'deputy', 'secretary', 'michelle', 'smith', 'assistant', 'secretary', 'mark', 'van', 'der', 'weide', 'general', 'counsel', 'michael', 'held', 'deputy', 'general', 'counsel', 'trevor', 'reeve', 'economist', 'sta

In [70]:
fd = nltk.FreqDist(words)

In [71]:
#fd.most_common(3)
fd.tabulate(3)

participants    inflation       market 
          89           79           58 


In [72]:
bigrams = nltk.collocations.BigramAssocMeasures()
trigrams = nltk.collocations.TrigramAssocMeasures()

In [73]:
bigramFinder = nltk.collocations.BigramCollocationFinder.from_words(words)
trigramFinder = nltk.collocations.TrigramCollocationFinder.from_words(words)

In [74]:
bigram_freq = bigramFinder.ngram_fd.items()

In [75]:
bigramFreqTable = pd.DataFrame(list(bigram_freq), columns=['bigram','freq']).sort_values(by='freq', ascending=False)

In [76]:
bigramFreqTable.head(20).reset_index(drop=True)

Unnamed: 0,bigram,freq
0,"(monetary, policy)",20
1,"(funds, rate)",19
2,"(intermeeting, period)",16
3,"(treasury, securities)",16
4,"(target, range)",14
5,"(open, market)",14
6,"(repo, facility)",14
7,"(several, participants)",14
8,"(inflation, expectations)",13
9,"(fima, repo)",13


In [77]:
bigramFreqTable[:20]

Unnamed: 0,bigram,freq
420,"(monetary, policy)",20
474,"(funds, rate)",19
912,"(intermeeting, period)",16
498,"(treasury, securities)",16
472,"(target, range)",14
1,"(open, market)",14
387,"(repo, facility)",14
620,"(several, participants)",14
1651,"(inflation, expectations)",13
395,"(fima, repo)",13


In [78]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [79]:
#get english stopwords
en_stopwords = set(stopwords.words('english'))

In [80]:
#function to filter for ADJ/NN bigrams
def rightTypes(ngram):
    if '-pron-' in ngram or '' in ngram or ' 'in ngram or 't' in ngram:
        return False
    for word in ngram:
        if word in en_stopwords:
            return False
    acceptable_types = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS')
    second_type = ('NN', 'NNS', 'NNP', 'NNPS')
    tags = nltk.pos_tag(ngram)
    if tags[0][1] in acceptable_types and tags[1][1] in second_type:
        return True
    else:
        return False

In [81]:
filtered_bi = bigramFreqTable[bigramFreqTable.bigram.map(lambda x: rightTypes(x))]

In [82]:
filtered_bi[:20]

Unnamed: 0,bigram,freq
420,"(monetary, policy)",20
474,"(funds, rate)",19
498,"(treasury, securities)",16
472,"(target, range)",14
1,"(open, market)",14
387,"(repo, facility)",14
620,"(several, participants)",14
1651,"(inflation, expectations)",13
395,"(fima, repo)",13
2668,"(economic, outlook)",12


In [57]:
trigram_freq = trigramFinder.ngram_fd.items()

In [58]:
trigramFreqTable = pd.DataFrame(list(trigram_freq), columns=['trigram','freq']).sort_values(by='freq', ascending=False)


In [59]:
trigramFreqTable.head().reset_index(drop=True)


Unnamed: 0,trigram,freq
0,"(federal, funds, rate)",19
1,"(fima, repo, facility)",13
2,"(affairs, board, governors)",12
3,"(monetary, affairs, board)",12
4,"(division, monetary, affairs)",12


In [60]:
trigramFreqTable[:10]

Unnamed: 0,trigram,freq
561,"(federal, funds, rate)",19
480,"(fima, repo, facility)",13
218,"(affairs, board, governors)",12
217,"(monetary, affairs, board)",12
216,"(division, monetary, affairs)",12
3983,"(stance, monetary, policy)",11
206,"(associate, director, division)",8
1629,"(labor, market, conditions)",8
560,"(range, federal, funds)",7
559,"(target, range, federal)",7


In [61]:

def rightTypesTri(ngram):
    if '-pron-' in ngram or '' in ngram or ' 'in ngram or '  ' in ngram or 't' in ngram:
        return False
    for word in ngram:
        if word in en_stopwords:
            return False
    first_type = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS')
    third_type = ('JJ', 'JJR', 'JJS', 'NN', 'NNS', 'NNP', 'NNPS')
    tags = nltk.pos_tag(ngram)
    if tags[0][1] in first_type and tags[2][1] in third_type:
        return True
    else:
        return False

In [62]:
filtered_tri = trigramFreqTable[trigramFreqTable.trigram.map(lambda x: rightTypesTri(x))]


In [63]:
filtered_tri[:10]


Unnamed: 0,trigram,freq
561,"(federal, funds, rate)",19
480,"(fima, repo, facility)",13
218,"(affairs, board, governors)",12
217,"(monetary, affairs, board)",12
216,"(division, monetary, affairs)",12
3983,"(stance, monetary, policy)",11
206,"(associate, director, division)",8
1629,"(labor, market, conditions)",8
560,"(range, federal, funds)",7
559,"(target, range, federal)",7


In [32]:
freq_bi = filtered_bi[:20].bigram.values


In [33]:
freq_tri = filtered_tri[:20].trigram.values


In [34]:
bigramFinder.apply_freq_filter(20)


In [35]:
bigramPMITable = pd.DataFrame(list(bigramFinder.score_ngrams(bigrams.pmi)), columns=['bigram','PMI']).sort_values(by='PMI', ascending=False)


In [36]:
bigramPMITable[:10]


Unnamed: 0,bigram,PMI
0,"(board, governors)",7.012928
1,"(monetary, policy)",6.604571
2,"(federal, reserve)",6.395881
3,"(federal, funds)",6.271954


In [37]:
trigramFinder.apply_freq_filter(20)


In [38]:
trigramPMITable = pd.DataFrame(list(trigramFinder.score_ngrams(trigrams.pmi)), columns=['trigram','PMI']).sort_values(by='PMI', ascending=False)


In [39]:
trigramPMITable[:10]


Unnamed: 0,trigram,PMI
