## Yield Spread model

This notebooks measures the accuracy on the cusips for clark capital

In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import time

import numpy as np
from google.cloud import bigquery
from google.cloud import storage
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns

from tensorflow.keras.layers import Embedding
from tensorflow.keras import activations
from tensorflow.keras import backend as K
from tensorflow.keras import initializers
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from sklearn import preprocessing
from datetime import datetime
import matplotlib.pyplot as plt
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from lightgbm import LGBMRegressor
import lightgbm

from IPython.display import display, HTML
import os


from ficc.data.process_data import process_data
from ficc.utils.auxiliary_variables import PREDICTORS, NON_CAT_FEATURES, BINARY, CATEGORICAL_FEATURES, IDENTIFIERS, PURPOSE_CLASS_DICT
from ficc.utils.gcp_storage_functions import upload_data, download_data
from ficc.utils.auxiliary_variables import RELATED_TRADE_BINARY_FEATURES, RELATED_TRADE_NON_CAT_FEATURES, RELATED_TRADE_CATEGORICAL_FEATURES

Initializing pandarallel with 16.0 cores
INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

2023-11-08 22:59:06.586599: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-08 22:59:06.598161: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-11-08 22:59:06.598989: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


Setting the environment variables

In [3]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="../ahmad_creds.json"
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
pd.options.mode.chained_assignment = None

Initializing BigQuery client and GCP storage client

In [4]:
bq_client = bigquery.Client()
storage_client = storage.Client()

Declaring hyper-parameters

In [5]:
TRAIN_TEST_SPLIT = 0.85
LEARNING_RATE = 0.0001
BATCH_SIZE = 1000
NUM_EPOCHS = 100

DROPOUT = 0.10
SEQUENCE_LENGTH = 5
NUM_FEATURES = 6

Checking if the treasury spreads and target attention features are present in PREDICTORS 

In [6]:
if 'ficc_treasury_spread' not in PREDICTORS:
    PREDICTORS.append('ficc_treasury_spread')
    NON_CAT_FEATURES.append('ficc_treasury_spread')
if 'target_attention_features' not in PREDICTORS:
    PREDICTORS.append('target_attention_features')

#### Data Preparation
We grab the data from a GCP bucket. The data is prepared using the ficc python package. More insight on how the data is prepared can be found [here](https://github.com/Ficc-ai/ficc/blob/ahmad_ml/ml_models/sequence_predictors/data_prep/data_preparation.ipynb)

In [7]:
%%time
import gcsfs
fs = gcsfs.GCSFileSystem(project='eng-reactor-287421')
with fs.open('ahmad_data/processed_data_574193KN7_2020.pkl') as f:
    data = pd.read_pickle(f)

CPU times: user 107 ms, sys: 5.88 ms, total: 113 ms
Wall time: 320 ms


In [8]:
data.trade_history

0    [[-27.12332287833499, 226.2, 4.176091194152832...
Name: trade_history, dtype: object

#### Date range for data

In [9]:
data.trade_date.max()

Timestamp('2020-03-20 00:00:00')

In [10]:
data.trade_date.min()

Timestamp('2020-03-20 00:00:00')

In [11]:
print(f'Restricting history to {SEQUENCE_LENGTH} trades')
data.trade_history = data.trade_history.apply(lambda x: x[:SEQUENCE_LENGTH])
data.target_attention_features = data.target_attention_features.apply(lambda x:x[:SEQUENCE_LENGTH])

Restricting history to 5 trades


In [12]:
data.trade_history.iloc[0].shape

(5, 6)

In [13]:
data.target_attention_features.iloc[0].shape

(1, 3)

In [14]:
data.sort_values('trade_datetime', inplace=True)

We don't give a predictions if yield is greater than 10

In [15]:
data = data[data['yield'] < 1000]

In [16]:
len(data)

1

### Creating features from trade history

This implementation is an adaption of Charles's implementation 

In [17]:
ttype_dict = { (0,0):'D', (0,1):'S', (1,0):'P' }

ys_variants = ["max_ys", "min_ys", "max_qty", "min_ago", "D_min_ago", "P_min_ago", "S_min_ago"]
ys_feats = ["_ys", "_ttypes", "_ago", "_qdiff"]
D_prev = dict()
P_prev = dict()
S_prev = dict()

def get_trade_history_columns():
    '''
    This function is used to create a list of columns
    '''
    YS_COLS = []
    for prefix in ys_variants:
        for suffix in ys_feats:
            YS_COLS.append(prefix + suffix)
    return YS_COLS

def extract_feature_from_trade(row, name, trade):
    yield_spread = trade[0]
    ttypes = ttype_dict[(trade[3],trade[4])] + row.trade_type
    seconds_ago = trade[5]
    quantity_diff = np.log10(1 + np.abs(10**trade[2] - 10**row.quantity))
    return [yield_spread, ttypes,  seconds_ago, quantity_diff]

def trade_history_derived_features(row):
    trade_history = row.trade_history
    trade = trade_history[0]
    
    D_min_ago_t = D_prev.get(row.cusip,trade)
    D_min_ago = 9        

    P_min_ago_t = P_prev.get(row.cusip,trade)
    P_min_ago = 9
    
    S_min_ago_t = S_prev.get(row.cusip,trade)
    S_min_ago = 9
    
    max_ys_t = trade; max_ys = trade[0]
    min_ys_t = trade; min_ys = trade[0]
    max_qty_t = trade; max_qty = trade[2]
    min_ago_t = trade; min_ago = trade[5]
    
    for trade in trade_history[0:]:
        #Checking if the first trade in the history is from the same block
        if trade[5] == 0: 
            continue
 
        if trade[0] > max_ys: 
            max_ys_t = trade
            max_ys = trade[0]
        elif trade[0] < min_ys: 
            min_ys_t = trade; 
            min_ys = trade[0]

        if trade[2] > max_qty: 
            max_qty_t = trade 
            max_qty = trade[2]
        if trade[5] < min_ago: 
            min_ago_t = trade; 
            min_ago = trade[5]
            
        side = ttype_dict[(trade[3],trade[4])]
        if side == "D":
            if trade[5] < D_min_ago: 
                D_min_ago_t = trade; D_min_ago = trade[5]
                D_prev[row.cusip] = trade
        elif side == "P":
            if trade[5] < P_min_ago: 
                P_min_ago_t = trade; P_min_ago = trade[5]
                P_prev[row.cusip] = trade
        elif side == "S":
            if trade[5] < S_min_ago: 
                S_min_ago_t = trade; S_min_ago = trade[5]
                S_prev[row.cusip] = trade
        else: 
            print("invalid side", trade)
    
    trade_history_dict = {"max_ys":max_ys_t,
                          "min_ys":min_ys_t,
                          "max_qty":max_qty_t,
                          "min_ago":min_ago_t,
                          "D_min_ago":D_min_ago_t,
                          "P_min_ago":P_min_ago_t,
                          "S_min_ago":S_min_ago_t}

    return_list = []
    for variant in ys_variants:
        feature_list = extract_feature_from_trade(row,variant,trade_history_dict[variant])
        return_list += feature_list
    
    return return_list

In [18]:
%%time
YS_COLS = get_trade_history_columns()
temp = data[['cusip','trade_history','quantity','trade_type']].parallel_apply(trade_history_derived_features, axis=1)
data[YS_COLS] = pd.DataFrame(temp.tolist(), index=data.index)

CPU times: user 23.2 ms, sys: 32.9 ms, total: 56 ms
Wall time: 73.2 ms


Adding trade history features to PREDICTORS list

In [19]:
for col in YS_COLS:
    if 'ttypes' in col and col not in PREDICTORS:
        PREDICTORS.append(col)
        CATEGORICAL_FEATURES.append(col)
    elif col not in PREDICTORS:
        NON_CAT_FEATURES.append(col)
        PREDICTORS.append(col)

This feature is used to check if there are any NaN values in the trade history. **It is not used to train the model**. 

In [20]:
%%time
print(len(data))
data['trade_history_sum'] = data.trade_history.parallel_apply(lambda x: np.sum(x))
data = data.dropna(subset=['trade_history_sum'])
print(len(data))

1
1
CPU times: user 12.3 ms, sys: 27 ms, total: 39.2 ms
Wall time: 59 ms


For the purpose of plotting, not used in training

In [21]:
data.purpose_sub_class.fillna(0, inplace=True)

Creating new ys label

In [22]:
data['new_ys'] = data['yield'] - data['new_ficc_ycl']
# data['diff_ys'] = data['new_ys'] - data['last_yield_spread']
# data['new_ys'] = data['yield'] - data['new_real_time_ficc_ycl']

Adding additional features proposed by Charles

In [23]:
data.last_trade_date = pd.to_datetime(data.last_trade_date)

In [24]:
data['last_duration'] = (data.last_calc_date - data.last_trade_date).dt.days

In [25]:
def duration(coupon, ytw, years, dollar_price, peryear=2):
    ytw = ytw.clip(0.001,np.inf)
    c = (coupon/100) / peryear
    y = (ytw/10000) / peryear
    n = years * peryear
    m = peryear
    macaulay_duration = ((1+y) / (m*y)) - ( (1 + y + n*(c-y)) / ((m*c* ((1+y)**n - 1)) + m*y))
    modified_duration = macaulay_duration / (1 + y)
    dv01 = modified_duration * dollar_price / 10000
    return dv01

def add_additional_feature(data):
    data['diff_ficc_ycl'] = data.new_ficc_ycl - data.last_ficc_ycl
    data['diff_ficc_treasury_spread'] = data.last_ficc_ycl - (data.treasury_rate * 100)
    data['dv01'] = duration(data.coupon, data.last_yield, data.last_duration, data.last_dollar_price)
    data['approx_dpd'] =  data.dv01 * data.diff_ficc_ycl
    data['overage'] =  (data.last_dollar_price + data.approx_dpd - data.next_call_price)
    #data['de_minimis_gap'] = data.last_dollar_price - data.de_minimis_threshold
    return data

# data = add_additional_feature(data)
# additional_features = ['diff_ficc_ycl','diff_ficc_treasury_spread','dv01','approx_dpd','overage']#,'de_minimis_gap']
# for i in additional_features:
#     if i not in NON_CAT_FEATURES:
#         NON_CAT_FEATURES.append(i)
#         PREDICTORS.append(i)

Selecting a subset of features for training. PREDICTORS are the features that we are going to use to train the model. More information about the feature set can be found [here](https://github.com/Ficc-ai/ficc_python/blob/d455bd30eca18f26a2535523530facad516dd90f/ficc/utils/auxiliary_variables.py#L120). We also select a set of additonal features, which are not used in training. These features are used to uderstand the results from the model.

In [26]:
auxiliary_features = ['dollar_price',
                     'calc_date', 
                     'trade_date',
                     'trade_datetime', 
                     'purpose_sub_class', 
                     'called_redemption_type', 
                     'calc_day_cat',
                     'yield',
                     'ficc_ycl',
                     'new_ys',
                     'trade_history_sum',
                     'new_ficc_ycl',
                     'days_to_refund',
                     'last_dollar_price',
                     'last_rtrs_control_number',
                     'is_called',
                     'federal_tax_status']

In [27]:
processed_data = data[:]

Checking for missing data and NaN values

In [28]:
print(len(processed_data))
processed_data.issue_amount = processed_data.issue_amount.replace([np.inf, -np.inf], np.nan)
processed_data.dropna(inplace=True, subset=PREDICTORS)
print(len(processed_data))

1
1


In [29]:
processed_data.sort_values('trade_datetime',ascending=False,inplace=True)

#### Loading encoders

In [30]:
!ls encoders*

encoders.pkl  encoders_test.pkl


In [31]:
with open('encoders_test.pkl','rb') as f:
    encoders_test = pickle.load(f)

In [32]:
encoders_test['max_ys_ttypes'].classes_

array(['DD', 'DP', 'DS', 'PD', 'PP', 'PS', 'SD', 'SP', 'SS'], dtype=object)

In [33]:
encoders_test['max_ys_ttypes'].transform(['DD', 'DP', 'DS', 'PD', 'PP', 'PS', 'SD', 'SP', 'SS'])

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [34]:
with open('/home/shayaan/ficc_python/encoders.pkl','rb') as f:
    encoders = pickle.load(f)

In [35]:
encoders['max_ys_ttypes'].classes_

array(['DD', 'DP', 'DS', 'PD', 'PP', 'PS', 'SD', 'SP', 'SS'], dtype=object)

In [36]:
encoders['max_ys_ttypes'].transform(['DD', 'DP', 'DS', 'PD', 'PP', 'PS', 'SD', 'SP', 'SS'])

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

#### Measuring accuracy

In [37]:
len(processed_data)

1

In [38]:
# processed_data = processed_data[(processed_data.days_to_call == 0) | (processed_data.days_to_call > np.log10(400))]
# processed_data = processed_data[(processed_data.days_to_refund == 0) | (processed_data.days_to_refund > np.log10(400))]
# processed_data = processed_data[(processed_data.days_to_maturity == 0) | (processed_data.days_to_maturity > np.log10(400))]
# processed_data = processed_data[processed_data.days_to_maturity < np.log10(30000)]

In [39]:
len(processed_data)

1

##### Converting data into format suitable for the model

In [40]:
def create_input(df):
    global encoders
    datalist = []
    datalist.append(np.stack(df['trade_history'].to_numpy()))
    datalist.append(np.stack(df['target_attention_features'].to_numpy()))

    noncat_and_binary = []
    for f in NON_CAT_FEATURES + BINARY:
        noncat_and_binary.append(np.expand_dims(df[f].to_numpy().astype('float32'), axis=1))
    datalist.append(np.concatenate(noncat_and_binary, axis=-1))
    
    for f in CATEGORICAL_FEATURES:
        encoded = encoders[f].transform(df[f])
        datalist.append(encoded.astype('float32'))
    
    return datalist

In [41]:
%%time
x_test = create_input(processed_data)
y_test = processed_data.new_ys
#y_train = train_dataframe.diff_ys

CPU times: user 4.87 ms, sys: 546 Âµs, total: 5.42 ms
Wall time: 4.52 ms


In [43]:
x_test.shape

AttributeError: 'list' object has no attribute 'shape'

### Load model and measure accuracy

In [67]:
yield_spread_model = keras.models.load_model('model-11-08')

In [68]:
yield_spread_model.inputs

[<KerasTensor: shape=(None, 5, 6) dtype=float32 (created by layer 'trade_history_input')>,
 <KerasTensor: shape=(None, 1, 3) dtype=float32 (created by layer 'target_attention_input')>,
 <KerasTensor: shape=(None, 48) dtype=float32 (created by layer 'NON_CAT_AND_BINARY_FEATURES')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'rating')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'incorporated_state_code')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'trade_type')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'purpose_class')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'max_ys_ttypes')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'min_ys_ttypes')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'max_qty_ttypes')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'min_ago_ttypes')>,
 <KerasTensor: shape=(None, 1) dtype=float32 (created by lay

In [69]:
temp_input = [[[[0.7264585614102543, -31.0, 6.698969841003418, 0.0, 0.0, 4.426657628095805], [0.7264585614102543, -31.0, 6.698969841003418, 0.0, 1.0, 4.426657628095805], [10.755671322886258, -21.0, 5.0, 0.0, 1.0, 4.468952557265534], [4.029871021978522, -34.0, 6.698969841003418, 0.0, 0.0, 5.035429738184549], [4.029871021978522, -34.0, 6.698969841003418, 1.0, 0.0, 5.035429738184549]], [[0.7264585614102543, -31.0, 6.698969841003418, 0.0, 0.0, 4.426657628095805], [0.7264585614102543, -31.0, 6.698969841003418, 0.0, 1.0, 4.426657628095805], [10.755671322886258, -21.0, 5.0, 0.0, 1.0, 4.468952557265534], [4.029871021978522, -34.0, 6.698969841003418, 0.0, 0.0, 5.035429738184549], [4.029871021978522, -34.0, 6.698969841003418, 1.0, 0.0, 5.035429738184549]]], [[[5.698969841003418, 0.0, 1.0]], [[5.698969841003418, 1.0, 0.0]]], [[5.698969841003418, 3.8943714538562375, 3.4058583993176366, 4.0, 8.929418563842773, 26708.0, 0.7264585614102543, 5.0, 3.4058583993176366, 8.382107137620704, 111.09, 8.382107137620704, 8.382107137620704, 1253.0, 180.0, 0.06666666666666667, -33.48258594541255, 10.755671322886258, 4.468952557265534, 5.602060872897559, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 4.029871021978522, 5.035429738184549, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [5.698969841003418, 3.8943714538562375, 3.4058583993176366, 4.0, 8.929418563842773, 26708.0, 0.7264585614102543, 5.0, 3.4058583993176366, 8.382107137620704, 111.09, 8.382107137620704, 8.382107137620704, 1253.0, 180.0, 0.06666666666666667, -33.48258594541255, 10.755671322886258, 4.468952557265534, 5.602060872897559, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 4.029871021978522, 5.035429738184549, 6.6532124469526535, 0.7264585614102543, 4.426657628095805, 6.6532124469526535, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]], [6.0, 6.0], [37.0, 37.0], [2.0, 1.0], [46.0, 46.0], [8.0, 7.0], [2.0, 1.0], [2.0, 1.0], [2.0, 1.0], [2.0, 1.0], [5.0, 4.0], [8.0, 7.0]]

In [70]:
for i in range(len(temp_input)):
    temp_input[i] = np.array(temp_input[i])

In [71]:
temp_input

[array([[[  0.72645856, -31.        ,   6.69896984,   0.        ,
            0.        ,   4.42665763],
         [  0.72645856, -31.        ,   6.69896984,   0.        ,
            1.        ,   4.42665763],
         [ 10.75567132, -21.        ,   5.        ,   0.        ,
            1.        ,   4.46895256],
         [  4.02987102, -34.        ,   6.69896984,   0.        ,
            0.        ,   5.03542974],
         [  4.02987102, -34.        ,   6.69896984,   1.        ,
            0.        ,   5.03542974]],
 
        [[  0.72645856, -31.        ,   6.69896984,   0.        ,
            0.        ,   4.42665763],
         [  0.72645856, -31.        ,   6.69896984,   0.        ,
            1.        ,   4.42665763],
         [ 10.75567132, -21.        ,   5.        ,   0.        ,
            1.        ,   4.46895256],
         [  4.02987102, -34.        ,   6.69896984,   0.        ,
            0.        ,   5.03542974],
         [  4.02987102, -34.        ,   6.69896984, 

In [72]:
yield_spread_model.predict(temp_input)

2023-11-08 23:10:14.079025: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:689] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "Tesla T4" frequency: 1590 num_cores: 40 environment { key: "architecture" value: "7.5" } environment { key: "cuda" value: "11020" } environment { key: "cudnn" value: "8100" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 4194304 shared_memory_size_per_multiprocessor: 65536 memory_size: 14488961024 bandwidth: 320064000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


array([[-2.7875898],
       [ 2.8818445]], dtype=float32)

In [62]:
yield_spread_model_old = keras.models.load_model('/home/shayaan/ficc_python/model-11-02')

In [64]:
yield_spread_model_old.predict(temp_input)

2023-11-08 23:04:08.883841: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:689] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "Tesla T4" frequency: 1590 num_cores: 40 environment { key: "architecture" value: "7.5" } environment { key: "cuda" value: "11020" } environment { key: "cudnn" value: "8100" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 4194304 shared_memory_size_per_multiprocessor: 65536 memory_size: 14488961024 bandwidth: 320064000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


array([[-1.7058502],
       [ 3.5071597]], dtype=float32)

In [42]:
processed_data['predicted_ys'] = yield_spread_model.predict(x_test, batch_size=BATCH_SIZE)

2023-10-24 20:16:51.449404: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:689] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" vendor: "NVIDIA" model: "Tesla T4" frequency: 1590 num_cores: 40 environment { key: "architecture" value: "7.5" } environment { key: "cuda" value: "11020" } environment { key: "cudnn" value: "8100" } num_registers: 65536 l1_cache_size: 24576 l2_cache_size: 4194304 shared_memory_size_per_multiprocessor: 65536 memory_size: 14488961024 bandwidth: 320064000 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
2023-10-24 20:16:55.545527: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8200


In [43]:
processed_data['predicted_ytw'] = processed_data['new_ficc_ycl'] + processed_data['predicted_ys']

In [44]:
print(f"MAE yield spread model: {round(np.mean(np.abs(processed_data.predicted_ytw - processed_data['yield'])), 3)}")

MAE yield spread model: 12.422


In [45]:
print(f"MAD yield spread model: {round(np.median(np.abs(processed_data.predicted_ytw - processed_data['yield'])), 3)}")

MAD yield spread model: 12.422


In [46]:
from ficc.pricing.price import compute_price
def get_trade_price(trade):
    # compute price does not need to return the calc_date, if we are using the calc_date model: 
    price, _ = compute_price(trade, trade.predicted_ytw/100)
    return price

In [47]:
processed_data['predicted_price'] = processed_data.apply(lambda x: get_trade_price(x), axis=1)

In [45]:
processed_data[['cusip','last_trade_date','predicted_price','predicted_ytw']]

Unnamed: 0,cusip,last_trade_date,predicted_price,predicted_ytw
0,574193KN7,2022-11-07,103.663,276.264069


In [50]:
processed_data[['cusip','last_trade_date','predicted_price','predicted_ytw']]

Unnamed: 0,cusip,last_trade_date,predicted_price,predicted_ytw
0,574193KN7,2020-03-20,109.179,275.277859


In [81]:
from google.cloud import aiplatform

project_id = "964018767272"
location = "us-east4"  # Change to your model's location
model_id = "7659154018622504960"  # Change to your model's ID

# Initialize the Vertex AI client
client = aiplatform.gapic.ModelServiceClient(client_options={"api_endpoint": f"{location}-aiplatform.googleapis.com"})

# Get the model's metadata
model_name = f"projects/{project_id}/locations/{location}/models/{model_id}"
response = client.get_model(name=model_name)

# Print the model metadata
print(response)

name: "projects/964018767272/locations/us-east4/models/7659154018622504960"
display_name: "model-11-08-ys"
predict_schemata {
}
metadata {
}
container_spec {
  image_uri: "us-docker.pkg.dev/vertex-ai/prediction/tf2-gpu.2-7:latest"
}
supported_deployment_resources_types: DEDICATED_RESOURCES
supported_deployment_resources_types: 3
supported_input_storage_formats: "jsonl"
supported_input_storage_formats: "bigquery"
supported_input_storage_formats: "csv"
supported_input_storage_formats: "tf-record"
supported_input_storage_formats: "tf-record-gzip"
supported_input_storage_formats: "file-list"
supported_output_storage_formats: "jsonl"
supported_output_storage_formats: "bigquery"
create_time {
  seconds: 1699486243
  nanos: 108601000
}
update_time {
  seconds: 1699486248
  nanos: 807307000
}
deployed_models {
  endpoint: "projects/964018767272/locations/us-east4/endpoints/4283882019768762368"
  deployed_model_id: "7922772926498078720"
}
etag: "AMEw9yOv-5MqPa_eaO_zLAk8lzgz9Dl6mb94hqp2w6jmHZsEJ