In [1]:
from bs4 import BeautifulSoup
import requests
import re
import datetime
import pandas as pd

In [2]:
def get_row_values(row_data):
    return [val.get_text() for val in row_data]

def fetch_treasury_yields(year):
    if year == 'ALL':
        treasury_url = "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldAll"
    else:
        treasury_url = "https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yieldYear&year=" + "%s"% year
        
    page = requests.get(treasury_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    rates_table = soup.find(class_="t-chart")
    
    oddrow_data = get_row_values(rates_table.select(".oddrow .text_view_data"))
    evenrow_data = get_row_values(rates_table.select(".evenrow .text_view_data"))
    
    return {'oddrow': oddrow_data, 'evenrow': evenrow_data}
    

In [3]:
test_yields = fetch_treasury_yields(2021)

In [4]:
def is_desc(rates_list):
    
    return (sorted(rates_list, reverse=True) == rates_list)

def avg_pct_change(rec_list):
    x = pd.Series(rec_list)
    
    return round(x.pct_change().mean(), 2)

def process_row_info(in_list):
    valMap = []
    record_list = []

    counter = 0

    for x in range(len(in_list)):

        record_list.append(in_list[x])
        counter+=1

        if (counter % 13) == 0:
            record_list[0] = datetime.datetime.strptime(record_list[0], "%m/%d/%y").strftime("%Y-%m-%d")
            record_list[1:13] = [float(i) for i in record_list[1:13]]

            record_list.extend((is_desc(record_list[1:13]), avg_pct_change(record_list[1:13])))
            valMap.append(record_list)
            record_list = []
            
    return valMap

def combined_scrapped_yields(yield_list):
    if len(yield_list) == 2:

        combined_map = []

        for key in yield_list.keys():
            combined_map.extend(process_row_info(yield_list.get(key)))
            
        return combined_map

In [5]:
y = combined_scrapped_yields(test_yields)

In [6]:
processed_yields = sorted(y, key=lambda x: x[0])

In [7]:
processed_yields[0:2]

[['2021-01-04',
  0.09,
  0.09,
  0.09,
  0.09,
  0.1,
  0.11,
  0.16,
  0.36,
  0.64,
  0.93,
  1.46,
  1.66,
  False,
  0.35],
 ['2021-01-05',
  0.08,
  0.09,
  0.09,
  0.09,
  0.1,
  0.13,
  0.17,
  0.38,
  0.66,
  0.96,
  1.49,
  1.7,
  False,
  0.36]]

In [8]:
import mariadb

In [9]:
import sys

# Instantiate Connection
try:
   conn = mariadb.connect(
      user="root",
      password="root",
      host="localhost",
      port=3306)
except mariadb.Error as e:
   print(f"Error connecting to MariaDB Platform: {e}")
   sys.exit(1)

In [10]:
# Instantiate Cursor
cur = conn.cursor(buffered=True , dictionary=True)

In [14]:
var_string = ', '.join('?' * len(processed_yields[0]))
var_string

'?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?'

In [12]:
for records in processed_yields:
    query_string = 'INSERT INTO us_treasury_yield_rates.test_rates VALUES (%s);' % var_string
    cur.execute(query_string, records)
    conn.commit()

In [None]:
query = f"SELECT * FROM us_treasury_yield_rates.test_rates"

cur.execute(query)

rows = cur.fetchall()

conn.close()


In [None]:
result = []

for x in range(len(rows)):
    #print('now it is: ' + str(x))
    single_query = []

    for key in rows[x].keys():
        rec = str(rows[x].get(key))
        single_query.append(rec)

    result.append(single_query)
            
print(result[0:2])