# Download FOMC Data
Scrape data from the Federal Reserve website and archives

In [None]:
# -*- coding: utf-8 -*-

# ENVIRONMENT CHECK:
import sys, os, inspect, site, pprint
# Check whether in Colab:
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB == True:
  print('YES, this is a Google Colaboratory environment.')
else:
  print('NO, this is not a Google Colaboratory environment.')
print(' ')

# Python installation files:
stdlib = os.path.dirname(inspect.getfile(os))
python_version = !python --version
print('Python Standard Library is located in:\n' + stdlib)
print(' ')
print('This environment is using {}'.format(str(python_version[0])))
print(' ')
print('Local system packages are located in:')
pprint.pprint(site.getsitepackages())
print(' ')
print('Local user packages are located in:\n' + site.getusersitepackages())

# Installed packages:
#!pip list -v
#!pip list --user -v


In [None]:
# Mount Google Drive:
if IN_COLAB:
  from google.colab import drive
  drive.mount('/content/drive', force_remount=True)


In [None]:
# Navigate to project folder:
%cd "/content/drive/MyDrive/Colab Notebooks/proj2/src"
!ls -al

In [None]:
# Define Path Variables:
if IN_COLAB:
  employment_data_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/Employment/'
  cpi_data_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/CPI/'
  fed_rates_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/FEDRates/'
  fx_rates_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/FXRates/'
  gdp_data_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/GDP/'
  ism_data_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/ISM/'
  sales_data_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/Sales/'
  treasury_data_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/MarketData/Treasury/'
  fomc_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/FOMC/'
  preprocessed_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/preprocessed/'
  train_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/train_data/'
  output_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/result/'
  keyword_lm_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/LoughranMcDonald/'
  glove_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/GloVe/'
  model_dir = '/content/drive/My Drive/Colab Notebooks/proj2/src/data/models/'

else:
  employment_data_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/Employment/'
  cpi_data_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/CPI/'
  fed_rates_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/FEDRates/'
  fx_rates_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/FXRates/'
  gdp_data_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/GDP/'
  ism_data_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/ISM/'
  sales_data_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/Sales/'
  treasury_data_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/MarketData/Treasury/'
  fomc_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/FOMC/'
  preprocessed_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/preprocessed/'
  train_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/train_data/'
  output_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/result/'
  keyword_lm_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/LoughranMcDonald/'
  glove_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/GloVe/'
  model_dir = 'C:/Users/theon/GDrive/Colab Notebooks/proj2/src/data/models/'

### Packages

In [None]:
if IN_COLAB:
  # Uninstall existing versions:
  #!pip uninstall bs4==0.0.1 -y
  #!pip uninstall textract==1.6.3 -y
  #!pip uninstall numpy==1.19.4 -y
  #!pip uninstall pandas==1.1.4 -y
  #!pip uninstall requests==2.24.0 -y
  #!pip uninstall tqdm==4.51.0 -y
  #!pip uninstall nltk==3.5 -y
  #!pip uninstall quandl==3.5.3 -y
  #!pip uninstall scikit-plot==0.3.7 -y
  #!pip uninstall seaborn==0.11.0 -y
  #!pip uninstall sklearn==0.0 -y
  #!pip uninstall torch==1.7.0 -y
  #!pip uninstall transformers==3.5.0 -y
  #!pip uninstall wordcloud==1.8.0 -y
  #!pip uninstall xgboost==1.2.1 -y

  # Install packages:
  #!pip install bs4==0.0.1
  #!pip install textract==1.6.3
  #!pip install numpy==1.19.4
  #!pip install pandas==1.1.4
  #!pip install requests==2.24.0
  #!pip install tqdm==4.51.0
  #!pip install nltk==3.5
  #!pip install quandl==3.5.3
  #!pip install scikit-plot==0.3.7
  #!pip install seaborn==0.11.0
  #!pip install sklearn==0.0
  #!pip install torch==1.7.0
  #!pip install transformers==3.5.0
  #!pip install wordcloud==1.8.0
  #!pip install xgboost==1.2.1
  #os.kill(os.getpid(), 9)


#### Inspect Packages

In [None]:
!pip list -v
!pip list --user -v


#### Import Packages:

In [None]:
# System:
import sys
import os
import re
from datetime import date
from datetime import datetime

# Computation:
import numpy as np
import pandas as pd
import pickle

# Web Scraping:
import json
from bs4 import BeautifulSoup
from tqdm import tqdm
import requests
import threading
from abc import ABCMeta, abstractmethod
print(sys.stdout.encoding)

# Text Extraction:
# Tika depends on Java version, so use textract instead as the pdf is anyway a simple text only
# # User TIKA for pdf parsing
# os.environ['TIKA_SERVER_JAR'] = 'https://repo1.maven.org/maven2/org/apache/tika/tika-server/1.19/tika-server-1.19.jar'
# import tika
# from tika import parser
import textract


### Import Helper Functions:

In [None]:
from fomc_get_data.FomcBase import FomcBase
from fomc_get_data.FomcStatement import FomcStatement
from fomc_get_data.FomcMinutes import FomcMinutes
from fomc_get_data.FomcMeetingScript import FomcMeetingScript
from fomc_get_data.ScrapePressConference import ScrapePressConference
from fomc_get_data.FomcSpeech import FomcSpeech
from fomc_get_data.FomcTestimony import FomcTestimony


## FOMC Data Scraping

### Inspect Helper Functions

In [None]:
!cat fomc_get_data/FomcBase.py

In [None]:
!cat fomc_get_data/FomcMeetingScript.py

In [None]:
!cat fomc_get_data/FomcMinutes.py

In [None]:
!cat fomc_get_data/FomcSpeech.py

In [None]:
!cat fomc_get_data/FomcStatement.py

In [None]:
!cat fomc_get_data/FomcTestimony.py

In [None]:
!cat pdf2text.py

### FOMC Calendar Data

In [None]:
# Inspect script:
!cat FomcGetCalendar.py

In [None]:
# Execute Script:
!python FomcGetCalendar.py 1980
#!python FomcGetCalendar.py 1980 > FomcGetCalendar_debug.txt # Save output for debugging

### FOMC Text Data

In [None]:
# Inspect script:
!cat FomcGetData.py

In [None]:
# Execute Script:
!python FomcGetData.py all 1980
#!python FomcGetData.py all 1980 > FomcGetData_debug.txt # Save output for debugging