In [1]:
from bs4 import BeautifulSoup
import requests
import re
import datetime
import pandas as pd

In [2]:
def get_row_values(row_data):
    value_list = []
    for record in row_data:
        if '\n\t\t\tN/A\n\t\t' in record.get_text():
            value = None
        else:
            value = record.get_text()
        
        value_list.append(value)
        
    return value_list

def fetch_treasury_yields(year):
    if year == 'ALL':
        treasury_url = "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldAll"
    else:
        treasury_url = "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldYear&year=" + "%s"% year
        
    page = requests.get(treasury_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    rates_table = soup.find(class_="t-chart")
    
    oddrow_data = get_row_values(rates_table.select(".oddrow .text_view_data"))
    evenrow_data = get_row_values(rates_table.select(".evenrow .text_view_data"))
    
    return {'oddrow': oddrow_data, 'evenrow': evenrow_data}
    

In [3]:
test_yields = fetch_treasury_yields(1990)

In [6]:
def is_desc(rates_list):
    rates_list = [i for i in rates_list if i is not None]
    return (sorted(rates_list, reverse=True) == rates_list)

def avg_pct_change(rec_list):
    rec_list = [i for i in rec_list if i is not None]
    pct_change_list = []
    
    for x in range(len(rec_list)-1):
        if rec_list[x] == 0:
            pct_change_list.append(rec_list[x+1])
        else:
            pct_change_val = (rec_list[x+1] - rec_list[x])/rec_list[x]
            pct_change_list.append(pct_change_val*100)

    return round(mean(pct_change_list),2)

def cvt_to_float(rec_list):
    float_list = []
    
    for x in rec_list:
        if x is None:
            pass
        else:
            x = float(x)
        
        float_list.append(x)
    
    return float_list
            

def process_row_info(in_list):
    valMap = []
    record_list = []

    counter = 0

    for x in range(len(in_list)):

        record_list.append(in_list[x])
        counter+=1

        if (counter % 13) == 0:
            record_list[0] = datetime.datetime.strptime(record_list[0], "%m/%d/%y").strftime("%Y-%m-%d")
            record_list[1:13] = cvt_to_float(record_list[1:13])

            record_list.extend((is_desc(record_list[1:13]), avg_pct_change(record_list[1:13])))
            valMap.append(record_list)
            record_list = []
            
    return valMap

def combined_scrapped_yields(yield_list):
    if len(yield_list) == 2:

        combined_map = []

        for key in yield_list.keys():
            combined_map.extend(process_row_info(yield_list.get(key)))
            
        return combined_map

In [None]:
#test_rec = test_yields['oddrow'][1:13]
test_rec = [2.06,2.01,1.98,1.88,1.72,1.47,1.38,1.35,1.42,1.47,1.77,1.95, 'NA']
test_rec_float = cvt_to_float(test_rec)
print(test_rec_float)
test_rec_drop = [i for i in test_rec_float if i != 'NA']
print(test_rec_drop)

pct_change_list = []
for x in range(len(test_rec_drop)-1):
    if test_rec_drop[x] == 0:
        pct_change_list.append(test_rec_drop[x+1])
    else:
        test_val = (test_rec_drop[x+1] - test_rec_drop[x])/test_rec_drop[x]
        pct_change_list.append(test_val*100)

print(pct_change_list)
round(mean(pct_change_list),2)

In [7]:
from statistics import mean
y = combined_scrapped_yields(test_yields)

In [8]:
processed_yields = sorted(y, key=lambda x: x[0])

In [9]:
processed_yields[0:2]

[['1990-01-02',
  None,
  None,
  7.83,
  7.89,
  7.81,
  7.87,
  7.9,
  7.87,
  7.98,
  7.94,
  None,
  8.0,
  False,
  0.27],
 ['1990-01-03',
  None,
  None,
  7.89,
  7.94,
  7.85,
  7.94,
  7.96,
  7.92,
  8.04,
  7.99,
  None,
  8.04,
  False,
  0.24]]

In [10]:
import mariadb

In [11]:
import sys

# Instantiate Connection
try:
   conn = mariadb.connect(
      user="root",
      password="root",
      host="localhost",
      port=3306)
except mariadb.Error as e:
   print(f"Error connecting to MariaDB Platform: {e}")
   sys.exit(1)

In [12]:
# Instantiate Cursor
cur = conn.cursor(buffered=True , dictionary=True)

In [13]:
var_string = ', '.join('?' * len(processed_yields[0]))
var_string

'?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?'

In [14]:
for records in processed_yields:
    query_string = 'INSERT INTO us_treasury_yield_rates.test_rates VALUES (%s);' % var_string
    cur.execute(query_string, records)
    conn.commit()

In [None]:
query = f"SELECT * FROM us_treasury_yield_rates.test_rates"

cur.execute(query)

rows = cur.fetchall()

conn.close()


In [None]:
result = []

for x in range(len(rows)):
    #print('now it is: ' + str(x))
    single_query = []

    for key in rows[x].keys():
        rec = str(rows[x].get(key))
        single_query.append(rec)

    result.append(single_query)
            
print(result[0:2])

In [None]:
import wbgapi as wb

In [None]:
indicators = {'NY.GDP.MKTP.CD':'GDP (current US$)'}

df = wb.data.DataFrame(indicators, 'USA', time=range(2000, 2020))

In [None]:
year_range = ""
for x in range(1990,2021):
    year_range += (str(x) + ',')
    
year_range

In [None]:
bea_url = "https://apps.bea.gov/api/data/?&UserID=<API KEY>&method=GetData&DataSetName=NIPA&TableName=T10105&SeriesCode=A191RC&LineDescription=Gross+domestic+product&Frequency=Q&Year=" + year_range + "&ResultFormat=json"
# data in millions

page2 = requests.get(bea_url)


In [None]:
import json
from urllib.request import urlopen
response = urlopen(bea_url)
data_json = json.loads(response.read())

In [None]:
import locale
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' ) 

raw_q_gdp = []

for x in data_json['BEAAPI']['Results']['Data']:
    
    if 'Gross domestic product' in x.values():
        y_gdp = []
        y_gdp.append(x['TimePeriod'])
        y_gdp.append(locale.atoi(x['DataValue']))
        #y_gdp.append(locale.atof(x['DataValue']))
        raw_q_gdp.append(y_gdp)

In [None]:
raw_q_gdp

In [None]:
def assign_dates(qlist):
    vals = { 'Q1' : '03-30'  ,  'Q2' : '06-30'  ,  'Q3' : '09-30', 'Q4' : '12-30' }
    
    for x in range(len(qlist)):
        cal_date = qlist[x][0][0:4] + "-" + vals[qlist[x][0][-2:]]
        qlist[x].extend([cal_date])
    
    return qlist

In [None]:
assign_dates(raw_q_gdp)

In [None]:
def mil_to_tril(qlist):
    
    for x in range(len(qlist)):
        qlist[x].extend([round(qlist[x][1] / (10**6),2)])
    
    return qlist

In [None]:
mil_to_tril(raw_q_gdp)[0]

In [None]:
var_string2 = ', '.join('?' * len(raw_q_gdp[0]))
var_string2

In [None]:
for records in raw_q_gdp:
    query_string = 'INSERT INTO us_treasury_yield_rates.test_gdp VALUES (%s);' % var_string2
    cur.execute(query_string, records)
    conn.commit()