### Compute and Compare Prices


This notebook uses the get price function and the current yield curve function to get the price calculated using ficc yield and msrb yield and then flag trades with the biggest difference.  

In [1]:
from numpy.core.numeric import NaN
from pickle5 import pickle
import redis
import pandas as pd
import json
import numpy as np
import requests
import subprocess
import redis
import time
import datetime
import holidays
import pandas as pd
from google.cloud import bigquery
from requests.api import request
import sklearn
import copy
from tqdm import tqdm
import os
import json
from dateutil.relativedelta import relativedelta
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
from dateutil import parser


In [2]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "eng-reactor-287421-112eb767e1b3.json"

In [3]:
bq_client = bigquery.Client()
redis_client = redis.Redis(host='10.146.62.92', port=6379, db=0)
PROJECT_ID = "eng-reactor-287421"

In [4]:
COUPON_FREQUENCY_DICT = {0:None,
                         1:2,
                         2:12,
                         3:1,
                         4:52,
                         5:4,
                         6:0.5,
                         7:1/3,
                         8:1/4,
                         9:1/5,
                         10:1/7,
                         11:1/8,
                         12:26,
                         13:None,
                         14:360,
                         16:0,
                         23:None}


In [5]:
dr = pd.date_range(start='2010-01-01', end='2100-01-01')
df = pd.DataFrame()
df['Date'] = dr

cal = calendar()
holidays = cal.holidays(start=dr.min(), end=dr.max())

This module gets the most recent Nelson-Siegel coefficient and
the standard scalar coefficient from Redis Memorystore given a particular datetime. 

Between the hours of 10 and 16 EST on business days, the Nelson-Siegel coefficient will be 
the coefficient for that minute and the standard scalar will be that of the last buisiness day.

In [None]:
def get_business_day(date):
    '''
    Checks whether the object is a datetime object.
    Then checks whether the date is before we began collecting yield curve coefficients,
    then checks if the date is a weekend or a US Federal Holiday. 
    If the last condition is true, the function loops back to the most recent business day. 
    '''

    if isinstance(date,datetime.date):
        date=date
    else:
        date=parser.parse(date)
    data_start_date= datetime.datetime.strptime("2021-7-27:0:00:00","%Y-%m-%d:%H:%M:%S")
    if date < data_start_date:
        return data_start_date
    else: date=date
    while date.strftime("%Y%m%d") in holidays or date.weekday() in set((0,5,6)):
        date = date-pd.DateOffset(1)
    else:
        return date

def get_last_business_time(date):

    '''
    Checks whether the time of the datetime object is before or after business hours. 
    If so, it sends us back to the last business datetime. 
    '''
    market_open = datetime.time(10, 0)
    market_close = datetime.time(16, 0)
    
    if date.time() < market_open:
        date = get_business_day(date-pd.DateOffset(1))
        date = date.replace(hour=15, minute=59)
        return date
    elif date.time() > market_close:
        date = date.replace(hour=15, minute=59)
        return date
    else: 
        return date

def find_last_minute(date):

    '''
    Checks whether the datetime exists as a key in redis. 
    If not, we loop back to the previous datetime key.
    '''

    while not redis_client.exists(date.strftime("%Y-%m-%d:%H:%M")):
        date=date-pd.Timedelta(minutes=1)
    else:
        return date

def get_yc_data(date):

    '''
    Fetches the most recent data from redis given a particular datetime. 
    '''
    date=get_business_day(date)
    date=get_last_business_time(date)
    date=find_last_minute(date)
    data=pickle.loads(redis_client.get(date.strftime("%Y-%m-%d:%H:%M")))
    return data

This module implements the function to get the daily yield curve. We use the value of the yield to calculate the ficc price for the trade. 

In [7]:

#Default shape parameter based on initial hyperparameter tuning. This affects the curvature and slope of the nelson-siegel curve
#higher values generally imply a straighter, more monotonic yield curve (particularly at maturities < 1)
L = 17 

###Functions to transform maturities into the components in the nelson-siegel model
def decay_transformation(t:np.array, L:float):
    '''
    This function takes a numpy array of maturities (or a single float) and a shape parameter. It returns the exponential function
    calculated from those values. This is the first feature of the nelson-siegel model.
    
    Parameters:
    t:np.array
    L:float
    '''
    return L*(1-np.exp(-t/L))/t

def laguerre_transformation(t, L):
    '''
    This function takes a numpy array of maturities (or a single float) and a shape parameter. It returns the laguerre function
    calculated from those values. This is the second feature of the nelson-siegel model.
    
    Parameters:
    t:np.array
    L:float
    '''
    return (L*(1-np.exp(-t/L))/t) -np.exp(-t/L)


def load_model_parameters(target_date):
    '''
    This function grabs the nelson siegel and standard scalar coefficient from 
    memory store 
    '''
    temp_dict = get_yc_data(target_date)
    
    # The keys for the dictionary in memory store are defined
    # at the time the data is uploaded on the server
    nelson_coeff = temp_dict['nelson_values']
    scalar_coeff = temp_dict['scalar_values']

    return nelson_coeff, scalar_coeff

###Functions used for prediction  Function to 
def get_scaled_features(t:np.array, exponential_mean:float, exponential_std:float, laguerre_mean:float, laguerre_std:float):
    
    '''
    This function takes as input the parameters loaded from the scaler parameter table in bigquery on a given day, alongside an array (or a
    single float) value to be scaled as input to make predictions. It then manually recreate the transformations from the sklearn
    StandardScaler used to scale data in training by first creating the exponential and laguerre functions then scaling them.
    
    Parameters:
    t:np.array
    exponential_mean:float
    exponential_std:float
    laguerre_mean:float
    laguerre_std:float
    '''
    
    X1 = (decay_transformation(t, L) - exponential_mean)/exponential_std 
    X2 = (laguerre_transformation(t, L) - laguerre_mean)/laguerre_std 
    return X1, X2

def predict_ytw(t:np.array, const:float , exponential:float , laguerre:float , exponential_mean:float , exponential_std:float , laguerre_mean:float , laguerre_std:float ):
    '''
    This is a wrapper function that takes the prediction inputs, the scaler parameters and the model parameters from a given day. It then
    scales the input using the get_scaled_features function to obtain the model inputs, and predicts the yield-to-worst implied by the
    nelson-siegel model on that day. Because the nelson-siegel model is linear, we can do a simple calculation. 
    
    Parameters:
    t:np.array
    const:float 
    exponential:float 
    laguerre:float 
    exponential_mean:float
    exponential_std:float
    laguerre_mean:float
    laguerre_std:float
    '''
    
    X1, X2 = get_scaled_features(t, exponential_mean, exponential_std, laguerre_mean, laguerre_std)
    return const + exponential*X1 + laguerre*X2


def get_current_yield_curve(maturities,target_date):
    '''
    This is the main function takes as input a json containing two arguments: the maturity we want the yield-to-worst for and the target 
    ate from which we want the yield curve used in the ytw calculations to be from. There are several conditional statements to deal with
    different types of exceptions. 
    
    The cloud function returns a json containing the status (Failed or Success), the error message (if any)
    and the result (nan if calculation was unsuccessful).
    '''
    
    error = ''
    try:
        nelson_siegel_daily_coef, scaler_daily_parameters = load_model_parameters(target_date)
    except Exception as e:
        raise e
    
    if len(nelson_siegel_daily_coef)==1:
        const, exponential, laguerre = nelson_siegel_daily_coef.values[0]
    elif len(nelson_siegel_daily_coef)>1:
        error = 'Multiple rows for target date in nelson_siegel_coef_daily, taking first one. Check bigquery table.'
        const, exponential, laguerre = nelson_siegel_daily_coef.iloc[0, :]
    else:
        return {'status':'Failed', 'error':'Target date not in nelson_siegel_coef_daily', 'result':np.nan}
   
    if len(scaler_daily_parameters)==1:
        exponential_mean, exponential_std, laguerre_mean, laguerre_std = scaler_daily_parameters.values[0]
    elif len(scaler_daily_parameters)>1:
        error = 'Multiple rows for target date in standardscaler_parameters_daily, taking first one. Check bigquery table.'
        exponential_mean, exponential_std, laguerre_mean, laguerre_std = scaler_daily_parameters.iloc[0, :]
    else:
        return {'status':'Failed', 'error':'Target date not in standardscaler_parameters_daily', 'result':np.nan}
    
    #If the function gets this far, the values are correct. A prediction is made and returned appropriately.
    prediction = predict_ytw(maturity, const, exponential, laguerre, exponential_mean, exponential_std, laguerre_mean, laguerre_std)
    return prediction

This module implements the function to get the dollar value for the trade given the reference data and the yield for the trade.

In [8]:
def diff_in_days(end_date, start_date, convention="360/30"):
    if convention != "360/30": 
        print("unknown convention", convention)
    Y2 = end_date.year
    Y1 = start_date.year
    M2 = end_date.month
    M1 = start_date.month
    D2 = end_date.day 
    D1 = min(start_date.day, 30)
    if D1 == 30: 
        D2 = min(D2,30)
    return (Y2 - Y1) * 360 + (M2 - M1) * 30 + (D2 - D1)

def get_next_coupon_date(first_coupon_date,settlement_date,time_delta):
    date = first_coupon_date
    while date < settlement_date:
        date = date + time_delta
    return date

def get_price(cusip, my_prev_coupon_date, first_coupon_date, my_next_coupon_date, end_date,
              settlement_date, dated_date, frequency, ytw, coupon, RV, time_delta):
#     if cusip == "34061YAH3": my_prev_coupon_date += relativedelta(weeks = 2)
        
    if pd.isnull(end_date): return np.inf
    B = 360
    Y = ytw/100
    
    if frequency == 0:
        A = diff_in_days(settlement_date,dated_date)
        accrued = coupon*A/B        
        duration = diff_in_days(end_date,settlement_date)
        periods = duration/(B/2)
        denom = pow(1 + Y/2, periods)
        DIR = diff_in_days(end_date,dated_date)
        base = (RV + coupon*DIR/B) / denom
        P = base - accrued
    else:
        if my_next_coupon_date > end_date: N = 0
        else:
            N = 1
            final_coupon_date = my_next_coupon_date
            while final_coupon_date + time_delta <= end_date:
                N += 1
                final_coupon_date += time_delta            

        A = diff_in_days(settlement_date,my_prev_coupon_date)
        if A < 0:
            print(cusip, A, settlement_date,my_prev_coupon_date)
            
        accrued = coupon*A/B
        E = B/frequency           # = number of days in interest payment period 
        assert E == round(E)
        
        F = diff_in_days(my_next_coupon_date,settlement_date)
        if my_next_coupon_date == first_coupon_date:
            G = diff_in_days(first_coupon_date,dated_date)
        else:
            G = E

        if end_date <= my_next_coupon_date:
            D = diff_in_days(end_date,settlement_date) 
            H = diff_in_days(end_date,my_prev_coupon_date) 
            base = (RV + coupon*H/B) / (1 + (Y/frequency)*D/E)
        else:
            D = diff_in_days(end_date,final_coupon_date) 
            S1 = (RV + coupon*D/B) / pow(1 + Y/frequency, F/E + N - 1 + D/E)

            S2 = coupon*G/B / pow(1 + Y/frequency, F/E)
            for K in range(2,N+1):
                S2 += coupon*E/B / pow(1 + Y/frequency, F/E + K - 1)
            base = S1 + S2
        P = base - accrued
    
    return round(P,3)

def compute_price(trade):
    frequency = trade.interest_payment_frequency
    if not frequency >= 0: # includes null frequency
        anomaly = True
        frequency = 2

    if frequency == 0:
        time_delta = 0
        my_next_coupon_date = trade.maturity_date
        my_prev_coupon_date = trade.dated_date
    else:
        time_delta = relativedelta(months = 12/frequency)
        if not trade.first_coupon_date >= trade.dated_date:
            anomaly = True
            print("bad first coupon date:", trade.cusip, trade.first_coupon_date)
            my_first_date = trade.dated_date
        else:
            my_first_date = trade.first_coupon_date
            
        my_next_coupon_date = get_next_coupon_date(my_first_date, trade.settlement_date, time_delta)
        if my_next_coupon_date == trade.first_coupon_date:
            my_prev_coupon_date = trade.dated_date
        else:
            my_prev_coupon_date = my_next_coupon_date - time_delta

    if trade.is_called:
        if pd.isnull(trade.refund_date):
            if trade.called_redemption_type in [1,5]:
                end_date = trade.maturity_date
            else:
                end_date = trade.next_call_date
        else:
            end_date = trade.refund_date
        
        
        if end_date < trade.settlement_date:
            anomaly = True
            print("anomalous refund date:", trade.cusip, "settlement:", trade.settlement_date, "refunding:", trade.refund_date)
            
        if not pd.isnull(trade.refund_price):
            par = trade.refund_price
        elif not pd.isnull(trade.next_call_price): 
            par = trade.next_call_price
        else: 
            par = 100
        final = get_price(trade.cusip,my_prev_coupon_date,trade.first_coupon_date,my_next_coupon_date,
                         end_date,trade.settlement_date, trade.dated_date, frequency,
                         trade['yield'], trade.coupon_rate, par, time_delta)
        calc = "refunding"
    else:
    
        next_price = get_price(trade.cusip,my_prev_coupon_date,trade.first_coupon_date,my_next_coupon_date,
                            trade.next_call_date,trade.settlement_date,trade.dated_date, frequency,
                            trade['yield'],trade.coupon_rate,trade.next_call_price,time_delta)

        to_par_price = get_price(trade.cusip,my_prev_coupon_date,trade.first_coupon_date,my_next_coupon_date,
                            trade.par_call_date,trade.settlement_date,trade.dated_date, frequency,
                            trade['yield'],trade.coupon_rate,trade.par_call_price,time_delta)

        maturity_price = get_price(trade.cusip,my_prev_coupon_date,trade.first_coupon_date,my_next_coupon_date,
                            trade.maturity_date,trade.settlement_date,trade.dated_date, frequency,
                            trade['yield'],trade.coupon_rate,100,time_delta)
        
        final = next_price; calc = "next call"
        if to_par_price < final:
            final = to_par_price; calc = "par call"
        if maturity_price < final:
            final = maturity_price; calc = "maturity"
        
    i: print(locals(), "\n==============")
    return final, calc

def transform_ref_data(df):
    df['interest_payment_frequency'] = df['interest_payment_frequency'].map(COUPON_FREQUENCY_DICT)
    df['coupon_rate'] = df['coupon'].astype(float)
    df['yield'] = df['yield'].astype(float)
    df['deferred'] = (df.interest_payment_frequency == 0) | df.coupon_rate == 0
    
    df['next_call_price'] = df['next_call_price'].astype(float)
    return df

def get_ref_data(cusip):
    data = redis_client.get(cusip)
    if data != None:
        df = pickle.loads(data)
        df = pd.DataFrame(df).T
        return df
    else:
        return "No reference data"

def get_cusip_price(df):
    df = transform_ref_data(df)
    final, calc = compute_price(df.iloc[0])
    return final,calc

We created csv file which contains the CUSIPs which cause the biggest errors in the yield spread model

In [9]:
df = pd.read_csv('Greater_than_50.csv')

In [10]:
df[df.cusip == '876443HJ4']

Unnamed: 0.1,Unnamed: 0,rtrs_control_number,trade_datetime,cusip,my_price,price_delta,msrb_cusip,yield_spread,num_prev_messages,publish_datetime,...,days_to_settle,days_to_maturity,days_to_call,days_to_refund,days_to_par,call_to_maturity,last_seconds_ago,last_yield_spread,last_size,predicted_yield_spreads
286,19943,2021093002731900,2021-09-30 12:00:37.000000000,876443HJ4,101.918,0.0,876443HJ4,-85.892988,0,2021-09-30 12:01:00.000000000,...,4,4.045597,2.173186,2.173186,2.173186,4.039771,14.460695,-78.707688,20.0,-32.944813
336,23754,2021093003157500,2021-09-30 12:27:03.000000000,876443HJ4,101.329,0.0,876443HJ4,56.007012,0,2021-09-30 12:27:36.000000000,...,4,4.045597,2.173186,2.173186,2.173186,4.039771,5.043425,-64.592988,50.0,-40.27726


In [11]:
date_cols = [col for col in list(df.columns) if 'DATE' in col.upper()]
for col in date_cols:
    df[col] = pd.to_datetime(df[col])
display(df.head())

Unnamed: 0.1,Unnamed: 0,rtrs_control_number,trade_datetime,cusip,my_price,price_delta,msrb_cusip,yield_spread,num_prev_messages,publish_datetime,...,days_to_settle,days_to_maturity,days_to_call,days_to_refund,days_to_par,call_to_maturity,last_seconds_ago,last_yield_spread,last_size,predicted_yield_spreads
0,36,2021100100164100,2021-10-01 08:33:07,88045RYF3,100.67,0.0,88045RYF3,20.811434,0,2021-10-01 08:33:28,...,4,3.879383,1.94939,,1.94939,3.874308,14.724204,313.938511,5.0,-54.60226
1,72,2021100101502500,2021-10-01 10:56:45,37970PFK3,102.537,0.0,37970PFK3,-37.288566,0,2021-10-01 10:57:04,...,4,3.351989,2.62634,,2.62634,3.261739,12.44625,54.313296,25.0,55.36122
2,124,2021100103108100,2021-10-01 12:30:52,00037CVN7,109.042,0.0,00037CVN7,-112.688566,0,2021-10-01 12:30:58,...,4,3.901513,2.823474,,2.823474,3.86368,12.763944,-38.561697,250.0,-60.218784
3,299,2021100102494800,2021-10-01 11:49:09,6496668V9,100.018,0.0,6496668V9,57.311434,0,2021-10-01 11:49:16,...,4,1.447158,0.0,,0.0,0.0,17.439558,-75.907647,20.0,-72.34237
4,486,2021100102665800,2021-10-01 12:00:08,771902GD9,101.524,0.0,771902GD9,32.111434,0,2021-10-01 12:00:38,...,4,3.86611,2.348305,,2.348305,3.852785,5.46806,187.811434,15.0,127.61683


We only have the coefficients for the Nelson Siegel model after the 27th of July 2021. We restrict the price calculation to the trades which occured after the said date for greater accuracy in computation

In [12]:
test_df = df[df.trade_date > datetime.datetime(2021,7,27)]

### Getting ref data from big query

In [13]:
max_date = max(test_df.trade_date).date().strftime('%Y-%m-%d')
min_date = min(test_df.trade_date).date().strftime('%Y-%m-%d')

In [14]:
DATA_QUERY = f"Select * from `eng-reactor-287421.primary_views.trade_history_with_reference_data_no_neg_yields` where trade_date >='{min_date}' and trade_date <= '{max_date}'"

In [15]:
ref_data =  bq_client.query(DATA_QUERY).result().to_dataframe()

In [16]:
ref_data = ref_data[['interest_payment_frequency','maturity_date','dated_date','first_coupon_date','next_call_date','refund_date','settlement_date','cusip','coupon','next_call_price','par_call_date','is_called','par_call_price','refund_price','called_redemption_type','trade_date']]

In [17]:
ref_data.drop_duplicates(inplace=True)
ref_data

Unnamed: 0,interest_payment_frequency,maturity_date,dated_date,first_coupon_date,next_call_date,refund_date,settlement_date,cusip,coupon,next_call_price,par_call_date,is_called,par_call_price,refund_price,called_redemption_type,trade_date
0,1.0,2026-05-01,2015-12-30,2016-05-01,2023-05-01,,2021-09-27,952718YB4,3.000000000,100.0,2023-05-01,False,100.0,,,2021-09-23
1,1.0,2026-08-01,2015-08-13,2016-02-01,2025-08-01,,2021-10-01,64966LV99,5.000000000,100.0,2025-08-01,False,100.0,,,2021-09-29
2,1.0,2030-12-01,2019-02-07,2019-06-01,2028-12-01,,2021-09-30,709224L72,5.000000000,100.0,2028-12-01,False,100.0,,,2021-09-28
3,1.0,2023-06-15,2013-08-29,2013-12-15,,,2021-10-01,646136R78,5.000000000,,,False,,,,2021-09-29
4,1.0,2031-08-01,2019-09-25,2020-02-01,2029-08-01,,2021-10-05,93974EAJ5,5.000000000,100.0,2029-08-01,False,100.0,,,2021-10-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195114,1.0,2039-05-01,2020-09-24,2021-05-01,2030-05-01,,2021-10-05,70914PT28,2.000000000,100.0,2030-05-01,False,100.0,,,2021-10-01
195120,1.0,2024-08-15,2017-02-22,2017-08-15,,,2021-09-30,663719E40,4.000000000,,,False,,,,2021-09-28
195124,1.0,2053-12-01,2021-09-30,2021-12-01,2029-12-01,,2021-10-01,915514BT9,3.000000000,100.0,2029-12-01,False,100.0,,,2021-09-27
195127,1.0,2033-12-01,2019-12-19,2020-06-01,2029-12-01,,2021-09-29,681504MQ6,2.250000000,100.0,2029-12-01,False,100.0,,,2021-09-27


### Main

In [21]:
result_df = pd.DataFrame(columns=['cusip','ficc_price','msrb_price','calc_method'])

for index, row in tqdm(test_df.iterrows(), total=test_df.shape[0]):
    maturity = (row.maturity_date - row.settlement_date).days/365.25
#     print(row['cusip'], row.trade_datetime.date())
    yc = get_current_yield_curve(maturity,row.trade_datetime)
    ytw = yc + row.predicted_yield_spreads
    #pdf = get_ref_data(row['cusip'])
    pdf = ref_data[(ref_data.cusip == row['cusip']) & (ref_data.trade_date == row.trade_datetime.date())]
    if type(pdf) != str:
        pdf = pdf[['interest_payment_frequency','maturity_date','dated_date','first_coupon_date','next_call_date','refund_date','settlement_date','cusip','coupon','next_call_price','par_call_date','is_called','par_call_price','refund_price','called_redemption_type']]
    else:
        continue
    pdf['yield'] = ytw/100
    price, calc_method = get_cusip_price(pdf)
    
    result_df = result_df.append({
     "cusip": row['cusip'],
     "ficc_price":  price,
     "ficc_yield":ytw,
     "msrb_price" :row['dollar_price'],
     "msrb_yield" : row['yield'],
     "calc_method":calc_method
    }, ignore_index=True) 

100%|██████████| 1/1 [00:00<00:00,  8.51it/s]


In [22]:
result_df

Unnamed: 0,cusip,ficc_price,msrb_price,calc_method,ficc_yield,msrb_yield
0,139372PZ3,100.028,100.037,refunding,163.132454,0.546


In [23]:
tqdm.pandas()

def pricit(cusip): 
    try:
        pdf = get_ref_data(cusip)[['interest_payment_frequency','maturity_date','dated_date','first_coupon_date','next_call_date','refund_date','settlement_date','cusip','yield','coupon','next_call_price','par_call_date','is_called','par_call_price','refund_price']]
        price, calc_method = get_cusip_price(pdf)
        return price 
    except Exception as Ex:
        return "Error"
    
df['ficc_price'] = df.cusip.progress_apply(pricit)

100%|██████████| 1405/1405 [00:54<00:00, 25.88it/s]
