This notebook corresponds to the cloud function: `update_all_sp_indices`.

In [1]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../ahmad_creds.json'

In [2]:
import pandas as pd
from google.cloud import bigquery
import urllib.request
import time

Set the date in `TARGET_DATE` for which the index values are needed.

In [3]:
TARGET_DATE = '2023-10-11'

In [None]:
TEMP_FILENAME = '/tmp/temp.xls'

In [4]:
# The headers are used to pass additonal information to the server to allow the script to download data from the server
opener = urllib.request.build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)

In [5]:
table_headers = ['date', 'ytw']

PROJECT_ID = 'eng-reactor-287421'

# table names in Bigquery in `eng-reactor-287421.spBondIndex` group
TABLE_NAMES = ['sp_15plus_year_national_amt_free_index',
               'sp_12_22_year_national_amt_free_index',  
               'sp_7_12_year_national_amt_free_municipal_bond_index_yield',
               'sp_high_quality_intermediate_managed_amt_free_municipal_bond_index_yield',
               'sp_high_quality_short_intermediate_municipal_bond_index_yield',
               'sp_high_quality_short_municipal_bond_index_yield',
               'sp_muni_high_quality_index_yield',
               'sp_long_term_national_amt_free_municipal_bond_index_yield']

# defining IDs using dictionaries so we don't need to rely on indexing for lists, etc, to remove any ambiguity; the INDEX_IDS are unique identifiers for each S&P index to request their data from the S&P API, these values were scraped directly from S&P
INDEX_IDS = {'sp_15plus_year_national_amt_free_index': 92346704, 
             'sp_12_22_year_national_amt_free_index': 946546, 
             'sp_7_12_year_national_amt_free_municipal_bond_index_yield': 946545, 
             'sp_high_quality_intermediate_managed_amt_free_municipal_bond_index_yield': 92404510, 
             'sp_high_quality_short_intermediate_municipal_bond_index_yield': 10001820, 
             'sp_high_quality_short_municipal_bond_index_yield': 10001819, 
             'sp_muni_high_quality_index_yield': 10001818, 
             'sp_long_term_national_amt_free_municipal_bond_index_yield': 946547}

In [6]:
def convert_types(df):
    df.date = pd.to_datetime(df.date, format='%Y-%m-%d')
    df.ytw = df.ytw * 100    # convert to basis points
    return df

In [7]:
def get_schema():
    schema = [bigquery.SchemaField('date', 'DATE'), 
              bigquery.SchemaField('ytw', 'FLOAT')]
    return schema

In [8]:
def get_data(link):
    urllib.request.urlretrieve(link, TEMP_FILENAME)

In [9]:
def upload_data(df, table_id):
    client = bigquery.Client(project=PROJECT_ID, location='US')
    job_config = bigquery.LoadJobConfig(schema=get_schema(),
                                        write_disposition='WRITE_APPEND')
    job = client.load_table_from_dataframe(df, table_id, job_config=job_config)
    job.result()

In [13]:
def main(args):
    for table_name in TABLE_NAMES:    # for each index, we download index data and upload
        print(table_name)
        index_id = INDEX_IDS[table_name]

        # link leads directly to .xls download, so we can use pd.read_excel directly by specifying the right `indexId` 
        link = f'https://www.spglobal.com/spdji/en/idsexport/file.xls?hostIdentifier=48190c8c-42c4-46af-8d1a-0cd5db894797&redesignExport=true&languageId=1&selectedModule=YieldToWorstGraphView&selectedSubModule=Graph&yearFlag=threeYearFlag&indexId={index_id}'
        get_data(link)
        df = pd.read_excel(TEMP_FILENAME)
        
        df.rename(columns={'Unnamed: 0': 'date', 'Unnamed: 1': 'ytw'}, inplace=True)    # remove the headers and tail description
        
        # lengths of the header and tails are fixed; the header use the first 8 line and the tail uses the last 4 lines
        df = df[8:-4]    # removing the header and tail in the excel sheet
        df = convert_types(df)

        df = df[df.date == TARGET_DATE]    # extracting the index values for `TARGET_DATE`
        upload_data(df, f'eng-reactor-287421.spBondIndex.{table_name}')
        time.sleep(15)    # have `time.sleep(15)` so that we do not want to keep hitting the S&P API, otherwise they will block our access

    return 'Run Success'

In [14]:
main('test')

sp_15plus_year_national_amt_free_index
          date       ytw
730 2023-10-11  4.654044
