This notebook corresponds to the cloud function: `compute_shape_parameter`.

In [3]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'creds.json'

In [4]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from datetime import datetime

from google.cloud import bigquery

In [None]:
TARGET_DATE = datetime(2023, 5, 26)

In [5]:
PROJECT_ID = 'eng-reactor-287421'
DATASET_NAME = 'yield_curves_v2'
TABLE_ID = f'{PROJECT_ID}.{DATASET_NAME}.shape_parameters'

SCHEMA = [bigquery.SchemaField('Date', 'DATE'),
          bigquery.SchemaField('L', 'FLOAT')]

SP_INDEX_TABLES = ['sp_12_22_year_national_amt_free_index',
                   'sp_15plus_year_national_amt_free_index',
                   'sp_7_12_year_national_amt_free_municipal_bond_index_yield',
                   'sp_muni_high_quality_index_yield',
                   'sp_high_quality_intermediate_managed_amt_free_municipal_bond_index_yield',
                   'sp_high_quality_short_intermediate_municipal_bond_index_yield',
                   'sp_high_quality_short_municipal_bond_index_yield',
                   'sp_long_term_national_amt_free_municipal_bond_index_yield']

SP_MATURITY_TABLES = ['sp_12_22_year_national_amt_free_index',
                      'sp_15plus_year_national_amt_free_index',
                      'sp_7_12_year_national_amt_free_index',
                      'sp_high_quality_index',
                      'sp_high_quality_intermediate_managed_amt_free_index',
                      'sp_high_quality_short_intermediate_index',
                      'sp_high_quality_short_index',
                      'sp_long_term_national_amt_free_municipal_bond_index_yield']


In [6]:
def upload_data(df, table_id, schema):
    client = bigquery.Client(project=PROJECT_ID, location='US')
    job_config = bigquery.LoadJobConfig(schema=schema, write_disposition='WRITE_APPEND')
    job = client.load_table_from_dataframe(df, table_id, job_config=job_config)
    try:
        job.result()
        print(f'Upload Successful to {table_id}')
    except Exception as e:
        print(f'Failed to Upload to {table_id}')
        raise e

In [7]:
def load_index_data():
    index_data  = [] 
    for table in SP_INDEX_TABLES:
        query = f'''SELECT * FROM `{PROJECT_ID}.spBondIndex.{table}` order by date desc'''
        df = pd.read_gbq(query, project_id=PROJECT_ID, dialect='standard')
        df['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
        df['ytw'] = df['ytw'] * 100
        df = df.drop_duplicates('date')
        df.set_index('date', inplace=True, drop=True)
        index_data.append(df)
    
    df = pd.concat(index_data, axis=1)
    df.columns = SP_MATURITY_TABLES
    df.ffill(axis=0, inplace=True)
    df.dropna(inplace=True)
    return df

In [8]:
def load_maturity_data():
    maturity_data  = []

    for table in SP_MATURITY_TABLES:
        query = f'SELECT * FROM `{PROJECT_ID}.spBondIndexMaturities.{table}` ORDER BY effectivedate DESC'
        df = pd.read_gbq(query, project_id=PROJECT_ID, dialect='standard')        
        df['effectivedate'] = pd.to_datetime(df['effectivedate'], format = '%Y-%m-%d')
        df = df.drop_duplicates('effectivedate')
        df.set_index('effectivedate', inplace=True, drop=True)
        df = df[['weightedAverageMaturity']]
        maturity_data.append(df) 
        
    df = pd.concat(maturity_data, axis=1)
    df.columns = SP_MATURITY_TABLES
    df.ffill(axis=0, inplace=True)
    df.dropna(inplace=True)
    return df

In [9]:
def get_maturity_dict(maturity_df, date):
    temp_df = maturity_df.loc[date].T
    temp_dict = dict(zip(temp_df.index, temp_df.values))
    return temp_dict


def decay_transformation(t, L):
    return L*(1-np.exp(-t/L))/t


def laguerre_transformation(t, L):
    return (L*(1-np.exp(-t/L))/t) - np.exp(-t/L)


def run_NL_model(summary_df_2_2, L):
    summary_df_2_2['X1'] = decay_transformation(summary_df_2_2['Weighted_Maturity'], L)
    summary_df_2_2['X2'] = laguerre_transformation(summary_df_2_2['Weighted_Maturity'], L)

    X = sm.add_constant(summary_df_2_2[['X1','X2']])
    y = summary_df_2_2.ytw
    lm = Ridge(alpha=0.001, random_state=1).fit(X , y)

    predictions = lm.predict(X)
    mae = mean_absolute_error(y,predictions)
    return lm, mae

In [19]:
def main(args):
    print(TARGET_DATE)
    index_data = load_index_data()
    maturity_data = load_maturity_data()
    maturity_dict = get_maturity_dict(maturity_data, TARGET_DATE)
    summary_df = pd.DataFrame(index_data.loc[TARGET_DATE])
    summary_df.columns = ['ytw']
    summary_df['Weighted_Maturity'] = summary_df.index.map(maturity_dict).astype(float)

    result_df = pd.DataFrame(columns=['L', 'MAE', 'model'])
    tau_dict = {}
    
    for potential_L_value in range(50):    # tries all values from 0 to 49; usually the value of L with the lowest `mae` is around 5, but could get up to 25, and so this loop will comfortably capture all possible values
        model, mae = run_NL_model(summary_df, potential_L_value)
        temp_df = pd.DataFrame({'L': potential_L_value, 'MAE': mae, 'model': model}, index=[0])
        result_df = result_df.append(temp_df)
    result_df.set_index('L', inplace=True, drop=True)
    result_df = result_df.sort_values('MAE', ascending=True)
    tau_dict[TARGET_DATE] = result_df.index[0]

    tau_table = pd.DataFrame(tau_dict.items(), columns=['Date', 'L'])
    tau_table['Date'] = pd.to_datetime(tau_table['Date'])
    tau_table['L'] = tau_table['L'].astype(float)  
    upload_data(tau_table, TABLE_ID, SCHEMA)
    return 'Upload successful'

In [20]:
main('test')

2023-05-26 00:00:00
Upload Successful


'Upload successful'