### Comparing dollar price and yield spread model


From the results, my suggestion is to use the yield spread model for CUSIPs that were last traded before 2021.

In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import time

import numpy as np
from google.cloud import bigquery
from google.cloud import storage
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns

from datetime import datetime
import matplotlib.pyplot as plt
import pickle
from lightgbm import LGBMRegressor
import lightgbm

from IPython.display import display, HTML
import os

from ficc.pricing.price import compute_price
from ficc.data.process_data import process_data
from ficc.utils.auxiliary_variables import PREDICTORS, NON_CAT_FEATURES, BINARY, CATEGORICAL_FEATURES, IDENTIFIERS, NON_CAT_FEATURES_DOLLAR_PRICE, BINARY_DOLLAR_PRICE, CATEGORICAL_FEATURES_DOLLAR_PRICE
from ficc.utils.gcp_storage_functions import upload_data, download_data
from ficc.utils.auxiliary_variables import COUPON_FREQUENCY_TYPE

Initializing pandarallel with 16.0 cores
INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

2023-09-08 21:30:41.327812: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-08 21:30:41.338862: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-08 21:30:41.339556: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [3]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="ahmad_creds.json"
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
pd.options.mode.chained_assignment = None

Creating big query and gcp storage client

In [4]:
bq_client = bigquery.Client()
storage_client = storage.Client()

In [5]:
if 'ficc_treasury_spread' not in PREDICTORS:
    PREDICTORS.append('ficc_treasury_spread')
    NON_CAT_FEATURES.append('ficc_treasury_spread')
if 'target_attention_features' not in PREDICTORS:
    PREDICTORS.append('target_attention_features')

Declaring hyper-parameters

In [6]:
BATCH_SIZE = 1000
SEQUENCE_LENGTH = 5
DOLLAR_SEQUENCE_LENGTH = 2
NUM_FEATURES = 6

Loading data from GCP bucket. This file only contain trades with negative yields.

In [7]:
%%time
import gcsfs
fs = gcsfs.GCSFileSystem(project='eng-reactor-287421')
# with fs.open('ahmad_data/test_orig_avg_life_date.pkl') as f:
#with fs.open('ahmad_data/test_pac_bonds.pkl') as f:
with fs.open('ahmad_data/test_has_sink_schedule.pkl') as f:
    data = pd.read_pickle(f)

CPU times: user 4.74 s, sys: 1.32 s, total: 6.06 s
Wall time: 9.7 s


In [8]:
len(data)

240046

In [9]:
data = data[(data.days_to_call == 0) | (data.days_to_call > np.log10(400))]
data = data[(data.days_to_refund == 0) | (data.days_to_refund > np.log10(400))]
data = data[(data.days_to_maturity == 0) | (data.days_to_maturity > np.log10(400))]
data = data[data.days_to_maturity < np.log10(30000)]

In [10]:
len(data)

212865

In [11]:
print(f'Restricting history to {SEQUENCE_LENGTH} trades')
data.trade_history = data.trade_history.apply(lambda x: x[:SEQUENCE_LENGTH])
data.trade_history_dollar_price = data.trade_history_dollar_price.apply(lambda x: x[:DOLLAR_SEQUENCE_LENGTH])
data.target_attention_features = data.target_attention_features.apply(lambda x:x[:SEQUENCE_LENGTH])

Restricting history to 5 trades


In [12]:
print(data.trade_history.iloc[0].shape)

(5, 6)


For a few CUSIPs, the last trade took place prior to October 2020. As a result, we lack features or data pertaining to these trades since our available data begins from October 2020 onwards.

In [13]:
data['last_yield_spread'].fillna(0, inplace=True)
data['last_seconds_ago'].fillna(0, inplace=True)
data['last_dollar_price'].fillna(0, inplace=True)
# data.dropna(subset=['new_ficc_ycl'], inplace=True)

In [14]:
len(data)

212865

Loading the encoders for dollar price model and yield spread model

In [15]:
encoders_yield_spread = download_data(storage_client,'ahmad_data','encoders.pkl')
# encoders_dollar_price = download_data(storage_client,'ahmad_data','encoders_dollar_price.pkl')
with open('saved_models/encoders_dollar_price_illiquid.pkl','rb') as f:
    encoders_dollar_price = pickle.load(f)

File encoders.pkl downloaded to ahmad_data.


Dollar price trade history features. This is an adaption of the code from Charles's notebook

In [16]:
data.sort_values('trade_datetime', inplace=True)

In [17]:
ttype_dict = { (0,0):'D', (0,1):'S', (1,0):'P' }

dp_variants = ["max_dp", "min_dp", "max_qty", "min_ago", "D_min_ago", "P_min_ago", "S_min_ago"]
dp_feats = ["_dp", "_ttypes", "_ago", "_qdiff"]
D_prev = dict()
P_prev = dict()
S_prev = dict()

def get_trade_history_columns_dollar_price():
    '''
    This function is used to create a list of columns
    '''
    YS_COLS = []
    for prefix in dp_variants:
        for suffix in dp_feats:
            YS_COLS.append(prefix + suffix)
    return YS_COLS

def extract_feature_from_trade_dollar_price(row, name, trade):
    dollar_price = trade[0]
    ttypes = ttype_dict[(trade[2],trade[3])] + row.trade_type
    seconds_ago = trade[4]
    quantity_diff = np.log10(1 + np.abs(10**trade[1] - 10**row.quantity))
    return [dollar_price, ttypes,  seconds_ago, quantity_diff]

def trade_history_derived_features_dollar_price(row):
    trade_history = row.trade_history_dollar_price
    trade = trade_history[0]
    
    D_min_ago_t = D_prev.get(row.cusip,trade)
    D_min_ago = 9        

    P_min_ago_t = P_prev.get(row.cusip,trade)
    P_min_ago = 9
    
    S_min_ago_t = S_prev.get(row.cusip,trade)
    S_min_ago = 9
    
    max_dp_t = trade; max_dp = trade[0]
    min_dp_t = trade; min_dp = trade[0]
    max_qty_t = trade; max_qty = trade[1]
    min_ago_t = trade; min_ago = trade[4]
    
    for trade in trade_history[0:]:
        #Checking if the first trade in the history is from the same block
        if trade[4] <= 0: 
            continue
 
        if trade[0] > max_dp: 
            max_dp_t = trade
            max_dp = trade[0]
        elif trade[0] < min_dp: 
            min_dp_t = trade; 
            min_dp = trade[0]

        if trade[1] > max_qty: 
            max_qty_t = trade 
            max_qty = trade[1]
        if trade[4] < min_ago: 
            min_ago_t = trade; 
            min_ago = trade[4]
            
        side = ttype_dict[(trade[2],trade[3])]
        if side == "D":
            if trade[4] < D_min_ago: 
                D_min_ago_t = trade
                D_min_ago = trade[4]
                D_prev[row.cusip] = trade
        elif side == "P":
            if trade[4] < P_min_ago: 
                P_min_ago_t = trade
                P_min_ago = trade[4]
                P_prev[row.cusip] = trade
        elif side == "S":
            if trade[4] < S_min_ago: 
                S_min_ago_t = trade
                S_min_ago = trade[4]
                S_prev[row.cusip] = trade
        else: 
            print("invalid side", trade)
    
    trade_history_dict = {"max_dp":max_dp_t,
                          "min_dp":min_dp_t,
                          "max_qty":max_qty_t,
                          "min_ago":min_ago_t,
                          "D_min_ago":D_min_ago_t,
                          "P_min_ago":P_min_ago_t,
                          "S_min_ago":S_min_ago_t}

    return_list = []
    for variant in dp_variants:
        feature_list = extract_feature_from_trade_dollar_price(row,variant,trade_history_dict[variant])
        return_list += feature_list
    
    return return_list

In [18]:
temp = data[['cusip','trade_history_dollar_price','quantity','trade_type']].parallel_apply(trade_history_derived_features_dollar_price, axis=1)

In [19]:
YS_COLS_DOLLAR_PRICE = get_trade_history_columns_dollar_price()

In [20]:
data[YS_COLS_DOLLAR_PRICE] = pd.DataFrame(temp.tolist(), index=data.index)

Yield spread model trade history features

In [21]:
ttype_dict = { (0,0):'D', (0,1):'S', (1,0):'P' }

ys_variants = ["max_ys", "min_ys", "max_qty", "min_ago", "D_min_ago", "P_min_ago", "S_min_ago"]
ys_feats = ["_ys", "_ttypes", "_ago", "_qdiff"]
D_prev = dict()
P_prev = dict()
S_prev = dict()

def get_trade_history_columns_yield_spread():
    '''
    This function is used to create a list of columns
    '''
    YS_COLS = []
    for prefix in ys_variants:
        for suffix in ys_feats:
            YS_COLS.append(prefix + suffix)
    return YS_COLS

def extract_feature_from_trade_yield_spread(row, name, trade):
    yield_spread = trade[0]
    ttypes = ttype_dict[(trade[3],trade[4])] + row.trade_type
    seconds_ago = trade[5]
    quantity_diff = np.log10(1 + np.abs(10**trade[2] - 10**row.quantity))
    return [yield_spread, ttypes,  seconds_ago, quantity_diff]

def trade_history_derived_features_yield_spread(row):
    trade_history = row.trade_history
    trade = trade_history[0]
    
    D_min_ago_t = D_prev.get(row.cusip,trade)
    D_min_ago = 9        

    P_min_ago_t = P_prev.get(row.cusip,trade)
    P_min_ago = 9
    
    S_min_ago_t = S_prev.get(row.cusip,trade)
    S_min_ago = 9
    
    max_ys_t = trade; max_ys = trade[0]
    min_ys_t = trade; min_ys = trade[0]
    max_qty_t = trade; max_qty = trade[2]
    min_ago_t = trade; min_ago = trade[5]
    
    for trade in trade_history[0:]:
        #Checking if the first trade in the history is from the same block
        if trade[5] == 0: 
            continue
 
        if trade[0] > max_ys: 
            max_ys_t = trade
            max_ys = trade[0]
        elif trade[0] < min_ys: 
            min_ys_t = trade; 
            min_ys = trade[0]

        if trade[2] > max_qty: 
            max_qty_t = trade 
            max_qty = trade[2]
        if trade[5] < min_ago: 
            min_ago_t = trade; 
            min_ago = trade[5]
            
        side = ttype_dict[(trade[3],trade[4])]
        if side == "D":
            if trade[5] < D_min_ago: 
                D_min_ago_t = trade; D_min_ago = trade[5]
                D_prev[row.cusip] = trade
        elif side == "P":
            if trade[5] < P_min_ago: 
                P_min_ago_t = trade; P_min_ago = trade[5]
                P_prev[row.cusip] = trade
        elif side == "S":
            if trade[5] < S_min_ago: 
                S_min_ago_t = trade; S_min_ago = trade[5]
                S_prev[row.cusip] = trade
        else: 
            print("invalid side", trade)
    
    trade_history_dict = {"max_ys":max_ys_t,
                          "min_ys":min_ys_t,
                          "max_qty":max_qty_t,
                          "min_ago":min_ago_t,
                          "D_min_ago":D_min_ago_t,
                          "P_min_ago":P_min_ago_t,
                          "S_min_ago":S_min_ago_t}

    return_list = []
    for variant in ys_variants:
        feature_list = extract_feature_from_trade_yield_spread(row,variant,trade_history_dict[variant])
        return_list += feature_list
    
    return return_list

In [22]:
temp = data[['cusip','trade_history','quantity','trade_type']].parallel_apply(trade_history_derived_features_yield_spread, axis=1)

In [23]:
YS_COLS_YIELD_SPREAD = get_trade_history_columns_yield_spread()

In [24]:
data[YS_COLS_YIELD_SPREAD] = pd.DataFrame(temp.tolist(), index=data.index)

In [25]:
for col in YS_COLS_YIELD_SPREAD:
    if 'ttypes' in col and col not in PREDICTORS:
        PREDICTORS.append(col)
        CATEGORICAL_FEATURES.append(col)
    elif col not in PREDICTORS:
        NON_CAT_FEATURES.append(col)
        PREDICTORS.append(col)

In [26]:
len(data)

212865

In [27]:
# data.dropna(subset=PREDICTORS, inplace=True)

In [28]:
data.sort_values('trade_datetime',ascending=False,inplace=True)

In [29]:
len(data)

212865

Creating inputs yield spread model

In [30]:
def create_input_yield_spread(df, encoders):
    datalist = []
    datalist.append(np.stack(df['trade_history'].to_numpy()))
    datalist.append(np.stack(df['target_attention_features'].to_numpy()))

    noncat_and_binary = []
    for f in NON_CAT_FEATURES + BINARY:
        noncat_and_binary.append(np.expand_dims(df[f].to_numpy().astype('float32'), axis=1))
    datalist.append(np.concatenate(noncat_and_binary, axis=-1))
    
    for f in CATEGORICAL_FEATURES:
        encoded = encoders[f].transform(df[f])
        datalist.append(encoded.astype('float32'))
    
    return datalist

In [31]:
x_test_yield_spread = create_input_yield_spread(data, encoders_yield_spread)

Creating inputs dollar price

In [32]:
def create_input_dollar_price(df, encoders):
    datalist = []
    datalist.append(np.stack(df['trade_history_dollar_price'].to_numpy()))
    datalist.append(np.stack(df['target_attention_features'].to_numpy()))

    noncat_and_binary = []
    for f in NON_CAT_FEATURES_DOLLAR_PRICE + BINARY_DOLLAR_PRICE:
        noncat_and_binary.append(np.expand_dims(df[f].to_numpy().astype('float32'), axis=1))
    datalist.append(np.concatenate(noncat_and_binary, axis=-1))
    
    for f in CATEGORICAL_FEATURES_DOLLAR_PRICE:
        encoded = encoders[f].transform(df[f])
        datalist.append(encoded.astype('float32'))
    
    return datalist

In [33]:
x_test_dollar_price = create_input_dollar_price(data, encoders_dollar_price)

#### Loading models

In [34]:
yield_spread_model = keras.models.load_model('saved_models/saved_model_illiquid_2023-09-01-19-59')

2023-09-08 21:31:16.566905: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-09-08 21:31:16.569431: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-08 21:31:16.570260: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-08 21:31:16.570863: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [35]:
dollar_price_model = keras.models.load_model('saved_models/saved_model_dollar_price_illiquid_2023-09-06-20-05')

In [36]:
yield_spread_predictions = yield_spread_model.predict(x_test_yield_spread, batch_size=BATCH_SIZE)

2023-09-08 21:31:39.577872: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:689] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "Tesla T4" frequency: 1590 num_cores: 40 environment { key: "architecture" value: "7.5" } environment { key: "cuda" value: "11020" } environment { key: "cudnn" value: "8100" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 4194304 shared_memory_size_per_multiprocessor: 65536 memory_size: 14488961024 bandwidth: 320064000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
2023-09-08 21:31:41.042444: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200


In [37]:
dollar_price_predictions = dollar_price_model.predict(x_test_dollar_price, batch_size=BATCH_SIZE)

2023-09-08 21:31:43.573793: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:689] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "Tesla T4" frequency: 1590 num_cores: 40 environment { key: "architecture" value: "7.5" } environment { key: "cuda" value: "11020" } environment { key: "cudnn" value: "8100" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 4194304 shared_memory_size_per_multiprocessor: 65536 memory_size: 14488961024 bandwidth: 320064000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


Converting predicted yield spreads to yield to worst

In [38]:
data['predicted_ys'] = yield_spread_predictions
data['predicted_ytw_yield_spread_model'] = (data['new_ficc_ycl'] + data['predicted_ys'])

Converting predicted dollar price to yield to worst

In [39]:
data['predicted_dollar_price'] = dollar_price_predictions

In [40]:
from ficc.pricing.yield_rate import compute_yield

In [41]:
def get_predicted_yield(row):
    try:
        ytw, calc_date = compute_yield(row, price = row.predicted_dollar_price)
    except:
        ytw, calc_date = None, None
    return ytw, calc_date

Comparing the BrentQ method and Newton's method, I found that the BrentQ method offers slightly higher accuracy in terms of the optimization results. However, it is important to note that the BrentQ method exhibits a significant drawback in terms of speed, taking considerably more time to converge compared to Newton's method. BrentQ method also fails to converge for a few trades

In [42]:
import scipy.optimize as optimize

def aux_price(trade, Y):
    myt = trade.copy()
    myt['yield'] = Y
    c = compute_price(myt,myt['yield'])
    return c[0]
    
def compute_yield_test(trade):
    ytw_func = lambda Y: aux_price(trade,Y) - trade.predicted_dollar_price
    ytw = optimize.brentq(ytw_func, -100, 100)
    return round(ytw, 4)

In [43]:
%%time
temp = data.parallel_apply(lambda x: get_predicted_yield(x), axis=1)
data[['converted_yield','dp_calc_date']] = pd.DataFrame(temp.tolist(), index=data.index)

CPU times: user 9.54 s, sys: 1.92 s, total: 11.5 s
Wall time: 1min


In [44]:
%%time
# data['converted_yield_bentq'] = data.parallel_apply(compute_yield_test, axis=1)

CPU times: user 6 µs, sys: 2 µs, total: 8 µs
Wall time: 11 µs


In [45]:
data.converted_yield *= 100
# data.converted_yield_bentq  *= 100

In [46]:
data = data[data.callable_at_cav == False]

#### Converting predicted yield to price

In [47]:
def get_trade_price(trade):
    # compute price does not need to return the calc_date, if we are using the calc_date model: 
    price, _ = compute_price(trade, trade.predicted_ytw_yield_spread_model/100)
    return price

In [48]:
data['converted_price_from_ys'] = data.parallel_apply(lambda x: get_trade_price(x), axis=1)

In [49]:
all_data = data.copy()

In [50]:
all_data['delta_price_ys'] = all_data.dollar_price - all_data.converted_price_from_ys
all_data['delta_price_dp'] = all_data.dollar_price - all_data.predicted_dollar_price
all_data['delta_yield_ys'] = all_data['yield'] - all_data.predicted_ytw_yield_spread_model
all_data['delta_yield_dp'] = all_data['yield'] - all_data.converted_yield

Upon reviewing the largest errors, it became evident that the dollar price performs quite poorly for cusips that have no prior trade history.

In [51]:
data = all_data[all_data.last_dollar_price != 0]

#### Looking at trades which have a previous trade

In [52]:
ys_ys_error = np.mean(np.abs(data['delta_yield_ys']))
dp_ys_error = np.mean(np.abs(data['delta_yield_dp']))
ys_dp_error = np.mean(np.abs(data['delta_price_ys']))
dp_dp_error = np.mean(np.abs(data['delta_price_dp']))

Dollar price error using the dollar price model

In [53]:
print(f"Dollar price MAE for dollar price model {np.round(dp_dp_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_price_dp'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_price_dp']) / np.sqrt(len(data)),3)}")

Dollar price MAE for dollar price model 0.834
MAD for dollar price model 0.419
Standard error 0.003


Dollar price error for yield spread model

In [54]:
print(f"Dollar price MAE for yield spread model {np.round(ys_dp_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_price_ys'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_price_ys']) / np.sqrt(len(data)),3)}")

Dollar price MAE for yield spread model 0.605
MAD for dollar price model 0.328
Standard error 0.002


Error in yield for dollar price model

In [55]:
print(f"Yield MAE for dollar price model {np.round(dp_ys_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_yield_dp'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_yield_dp']) / np.sqrt(len(data)),3)}")

Yield MAE for dollar price model 10.39
MAD for dollar price model 4.9
Standard error 0.039


Error in yield for yield spread model

In [56]:
print(f"Yield MAE for yield spread model {np.round(ys_ys_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_yield_ys'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_yield_ys']) / np.sqrt(len(data)),3)}")

Yield MAE for yield spread model 7.537
MAD for dollar price model 3.766
Standard error 0.038


#### Looking at CUSIPs without any previous trade

In [57]:
no_prev_trade = all_data[all_data.last_dollar_price == 0]

In [58]:
ys_ys_error = np.mean(np.abs(no_prev_trade['delta_yield_ys']))
dp_ys_error = np.mean(np.abs(no_prev_trade['delta_yield_dp']))
ys_dp_error = np.mean(np.abs(no_prev_trade['delta_price_ys']))
dp_dp_error = np.mean(np.abs(no_prev_trade['delta_price_dp']))

Measuring accuracy for dollar price model 

In [59]:
print(f"Dollar price MAE for dollar price model {np.round(dp_dp_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_price_dp'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_price_dp']) / np.sqrt(len(data)),3)}")

Dollar price MAE for dollar price model 57.536
MAD for dollar price model 0.419
Standard error 0.003


In [60]:
print(f"Yield MAE for dollar price model {np.round(dp_ys_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_yield_dp'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_yield_dp']) / np.sqrt(len(data)),3)}")

Yield MAE for dollar price model 855.914
MAD for dollar price model 4.9
Standard error 0.039


Measuring accuracy for yield spread model

In [61]:
print(f"Dollar price MAE for yield spread model {np.round(ys_dp_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_price_ys'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_price_ys']) / np.sqrt(len(data)),3)}")

Dollar price MAE for yield spread model 7.544
MAD for dollar price model 0.328
Standard error 0.002


In [62]:
print(f"Yield MAE for yield spread model {np.round(ys_ys_error,3)}")
print(f"MAD for dollar price model {np.round(np.median(np.abs(data['delta_yield_ys'])),3)}")
print(f"Standard error {np.round(np.std(data['delta_yield_ys']) / np.sqrt(len(data)),3)}")

Yield MAE for yield spread model 95.92
MAD for dollar price model 3.766
Standard error 0.038


In [63]:
all_data.cusip.unique()

array(['800851PV3', '940642GG0', '83756C4Q5', ..., '376087FS6',
       '108152HF9', '793139AS5'], dtype=object)

In [93]:
data[(np.abs(data.delta_price_ys) < np.abs(data.delta_price_dp)) & (np.abs(data.delta_price_dp) > 10) & (np.abs(data.delta_price_ys) < 1)][['cusip','rtrs_control_number',
                                                                                                                                           'dollar_price',
                                                                                                                                           'predicted_dollar_price',
                                                                                                                                           'converted_price_from_ys',
                                                                                                                                           'last_dollar_price',
                                                                                                                                           'rating','trade_date','']]

Unnamed: 0,cusip,rtrs_control_number,dollar_price,predicted_dollar_price,converted_price_from_ys,last_dollar_price,rating,trade_date
209937,198446AJ5,2023083104875700,86.852,97.96389,86.339,100.5,MR,2023-08-31
207202,543370D98,2023083101269400,74.618,90.440437,75.273,81.941,MR,2023-08-31
207201,543370D98,2023083101269600,74.868,90.472374,75.416,81.941,MR,2023-08-31
167078,133448EK2,2023082411456800,81.385,91.611534,82.065,90.62,A,2023-08-24
166756,88901MAT3,2023082411054300,85.446,96.375732,85.574,90.748,BBB,2023-08-24
156579,91754TMJ9,2023082312623700,82.585,92.948029,82.938,90.971,AA,2023-08-23
139869,959215FX7,2023082205722300,80.018,94.677719,80.895,87.311,A,2023-08-22
124968,79730WBW9,2023081704583600,86.218,96.43158,86.035,96.125,AA,2023-08-17
68333,658909VX1,2023080905101300,89.35,99.630676,89.755,94.326,MR,2023-08-09
61933,13032USK5,2023080809576000,101.0,90.582237,101.048,108.01,MR,2023-08-08


In [109]:
data[data.rtrs_control_number == 2023080809576000][['predicted_ys','new_ficc_ycl','last_ficc_ycl','last_yield']]

Unnamed: 0,predicted_ys,new_ficc_ycl,last_ficc_ycl,last_yield
61933,162.123871,314.23535,269.007275,356.1


In [108]:
data[data.rtrs_control_number == 2023080809576000][['trade_history','new_ficc_ycl']].iloc[0][0]

array([[ 87.09272501,  76.1       ,   4.17609119,   0.        ,
          1.        ,   7.58086887],
       [123.79272501, 112.8       ,   4.17609119,   0.        ,
          0.        ,   7.58086944],
       [ 92.09272501,  81.1       ,   4.47712135,   0.        ,
          1.        ,   7.58087034],
       [128.79272501, 117.8       ,   4.47712135,   0.        ,
          0.        ,   7.58087081],
       [ 97.09272501,  86.1       ,   4.69896984,   0.        ,
          1.        ,   7.58087637]])

In [83]:
# temp_data = data[data.rtrs_control_number == 2023083104875700][:]
temp_data = data[(np.abs(data.delta_price_ys) < np.abs(data.delta_price_dp)) & (np.abs(data.delta_price_dp) > 10) & (np.abs(data.delta_price_ys) < 1)]

In [90]:
temp_data[NON_CAT_FEATURES_DOLLAR_PRICE[10:15]]

Unnamed: 0,orig_principal_amount,max_amount_outstanding,accrued_days,days_in_interest_payment,A/E
209937,5.380213,5.380213,3877,180.0,0.366667
207202,6.466868,6.466868,1200,180.0,0.194444
207201,6.466868,6.466868,1200,180.0,0.194444
167078,6.066326,6.066326,487,180.0,0.827778
166756,6.908485,6.908485,1697,180.0,0.488889
156579,6.49485,6.49485,2887,180.0,0.733333
139869,6.263636,6.263636,1355,180.0,0.638889
124968,7.417056,7.417056,2352,180.0,0.961111
68333,6.432167,6.432167,1486,180.0,0.227778
61933,7.052502,7.052502,1782,180.0,0.9
