**MacKay Shields wants model predictions for a list of cusips that traded between 09-26 to 10-02. Models archived in cloud storage automated_training bucket [here](https://console.cloud.google.com/storage/browser/automated_training;tab=objects?forceOnBucketsSortingFiltering=true&authuser=1&project=eng-reactor-287421&prefix=&forceOnObjectsSortingFiltering=false) are dated as of the day of deployment, meaning they are tested on the day prior.** 

Namely, we need models:
1. 09-26 (Tue)
2. 09-27 (Wed)
3. 09-28 (Thu)
4. 09-29 (Fri)
5. 09-30 (Mon, but trained on Sat)

In [3]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import time
import gc
import json
import pytz

import numpy as np
from google.cloud import bigquery
from google.cloud import storage
import gcsfs

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
from pandas.tseries.offsets import BDay

from tensorflow.keras.layers import Embedding
from tensorflow.keras import activations
from tensorflow.keras import backend as K
from tensorflow.keras import initializers
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from sklearn import preprocessing
from sklearn.metrics import mean_absolute_error
from datetime import datetime
import matplotlib.pyplot as plt
import pickle5 as pickle


from ficc.utils.nelson_siegel_model import *
from ficc.utils.diff_in_days import *
from ficc.utils.auxiliary_functions import sqltodf
import os


from ficc.data.process_data import process_data
from ficc.utils.auxiliary_variables import PREDICTORS, NON_CAT_FEATURES, BINARY, CATEGORICAL_FEATURES, IDENTIFIERS, PURPOSE_CLASS_DICT, NUM_OF_DAYS_IN_YEAR
from ficc.utils.gcp_storage_functions import upload_data, download_data
from ficc.utils.auxiliary_variables import RELATED_TRADE_BINARY_FEATURES, RELATED_TRADE_NON_CAT_FEATURES, RELATED_TRADE_CATEGORICAL_FEATURES

import sys
sys.path.append('../../../')
from ficc_keras_utils import *
import ficc_keras_utils
from lgbm_tools import *
from ficc_debiasing import *

pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(f'TF Version: {tf.__version__}')

2023-10-04 16:22:59.086459: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-04 16:22:59.264471: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-04 16:22:59.266175: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


Initializing pandarallel with 16.0 cores
INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
TF Version: 2.7.0


In [4]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/home/jupyter/ficc/isaac_creds.json"
os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
pd.options.mode.chained_assignment = None
    
bq_client = bigquery.Client()
storage_client = storage.Client()

train_start = '2023-02-01'
train_end = '2023-07-01'
test_start = '2023-07-01'
test_end = '2023-08-01'

VALIDATION_SPLIT = 0.1
LEARNING_RATE = 0.0001
BATCH_SIZE = 1000 
NUM_EPOCHS = 75 
DROPOUT = 0.1 
TRADE_SEQUENCE_LENGTH = 5
NUM_FEATURES = 6
target_variable = 'new_ys' 

In [5]:
def load_data_from_pickle(path, bucket = 'isaac_data'):
    if os.path.isfile(path):
        print('File available, loading pickle')
        with open(path, 'rb') as f:
            data = pickle.load(f)
    else:
        print(f'File not available, downloading from cloud storage and saving to {path}')
        fs = gcsfs.GCSFileSystem(project='eng-reactor-287421')
        gc_path = os.path.join(bucket, path)
        print(gc_path)
        with fs.open(gc_path) as gf:
            data = pd.read_pickle(gf)
        with open(path, 'wb') as f:
            pickle.dump(data, f)
    return data

In [77]:
BINARY = ['callable',
          'sinking',
          'zerocoupon',
          'is_non_transaction_based_compensation',
          'is_general_obligation',
          'callable_at_cav',
          'extraordinary_make_whole_call',
          'make_whole_call',
          'has_unexpired_lines_of_credit',
          'escrow_exists']

CATEGORICAL_FEATURES = ['rating',
                        'incorporated_state_code',
                        'trade_type',
                        'purpose_class',
                        'max_ys_ttypes',
                        'min_ys_ttypes',
                        'max_qty_ttypes',
                        'min_ago_ttypes',
                        'D_min_ago_ttypes',
                        'P_min_ago_ttypes',
                        'S_min_ago_ttypes']

NON_CAT_FEATURES = ['quantity',
                     'days_to_maturity',
                     'days_to_call',
                     'coupon',
                     'issue_amount',
                     'last_seconds_ago',
                     'last_yield_spread',
                     'days_to_settle',
                     'days_to_par',
                     'maturity_amount',
                     'issue_price',
                     'orig_principal_amount',
                     'max_amount_outstanding',
                     'accrued_days',
                     'days_in_interest_payment',
                     'A/E',
                     'ficc_treasury_spread',
                     'max_ys_ys',
                     'max_ys_ago',
                     'max_ys_qdiff',
                     'min_ys_ys',
                     'min_ys_ago',
                     'min_ys_qdiff',
                     'max_qty_ys',
                     'max_qty_ago',
                     'max_qty_qdiff',
                     'min_ago_ys',
                     'min_ago_ago',
                     'min_ago_qdiff',
                     'D_min_ago_ys',
                     'D_min_ago_ago',
                     'D_min_ago_qdiff',
                     'P_min_ago_ys',
                     'P_min_ago_ago',
                     'P_min_ago_qdiff',
                     'S_min_ago_ys',
                     'S_min_ago_ago',
                     'S_min_ago_qdiff']

ttype_dict = { (0,0):'D', (0,1):'S', (1,0):'P' }

ys_variants = ["max_ys", "min_ys", "max_qty", "min_ago", "D_min_ago", "P_min_ago", "S_min_ago"]
ys_feats = ["_ys", "_ttypes", "_ago", "_qdiff"]
D_prev = dict()
P_prev = dict()
S_prev = dict()

def get_trade_history_columns():
    '''
    This function is used to create a list of columns
    '''
    YS_COLS = []
    for prefix in ys_variants:
        for suffix in ys_feats:
            YS_COLS.append(prefix + suffix)
    return YS_COLS

def extract_feature_from_trade(row, name, trade):
    yield_spread = trade[0]
    ttypes = ttype_dict[(trade[3],trade[4])] + row.trade_type
    seconds_ago = trade[5]
    quantity_diff = np.log10(1 + np.abs(10**trade[2] - 10**row.quantity))
    return [yield_spread, ttypes,  seconds_ago, quantity_diff]

def trade_history_derived_features(row):
    trade_history = row.trade_history
    trade = trade_history[0]
    
    D_min_ago_t = D_prev.get(row.cusip,trade)
    D_min_ago = 9        

    P_min_ago_t = P_prev.get(row.cusip,trade)
    P_min_ago = 9
    
    S_min_ago_t = S_prev.get(row.cusip,trade)
    S_min_ago = 9
    
    max_ys_t = trade; max_ys = trade[0]
    min_ys_t = trade; min_ys = trade[0]
    max_qty_t = trade; max_qty = trade[2]
    min_ago_t = trade; min_ago = trade[5]
    
    for trade in trade_history[0:]:
        #Checking if the first trade in the history is from the same block
        if trade[5] == 0: 
            continue
 
        if trade[0] > max_ys: 
            max_ys_t = trade
            max_ys = trade[0]
        elif trade[0] < min_ys: 
            min_ys_t = trade; 
            min_ys = trade[0]

        if trade[2] > max_qty: 
            max_qty_t = trade 
            max_qty = trade[2]
        if trade[5] < min_ago: 
            min_ago_t = trade; 
            min_ago = trade[5]
            
        side = ttype_dict[(trade[3],trade[4])]
        if side == "D":
            if trade[5] < D_min_ago: 
                D_min_ago_t = trade; D_min_ago = trade[5]
                D_prev[row.cusip] = trade
        elif side == "P":
            if trade[5] < P_min_ago: 
                P_min_ago_t = trade; P_min_ago = trade[5]
                P_prev[row.cusip] = trade
        elif side == "S":
            if trade[5] < S_min_ago: 
                S_min_ago_t = trade; S_min_ago = trade[5]
                S_prev[row.cusip] = trade
        else: 
            print("invalid side", trade)
    
    trade_history_dict = {"max_ys":max_ys_t,
                          "min_ys":min_ys_t,
                          "max_qty":max_qty_t,
                          "min_ago":min_ago_t,
                          "D_min_ago":D_min_ago_t,
                          "P_min_ago":P_min_ago_t,
                          "S_min_ago":S_min_ago_t}

    return_list = []
    for variant in ys_variants:
        feature_list = extract_feature_from_trade(row,variant,trade_history_dict[variant])
        return_list += feature_list
    
    return return_list


def create_input(df):
    datalist = []
    
    datalist.append(np.stack(df['trade_history'].to_numpy()))
    datalist.append(np.stack(df['target_attention_features'].to_numpy()))

    noncat_and_binary = []
    for f in NON_CAT_FEATURES + BINARY:
        noncat_and_binary.append(np.expand_dims(df[f].to_numpy().astype('float64'), axis=1))
    datalist.append(np.concatenate(noncat_and_binary, axis=-1))
    
    for f in CATEGORICAL_FEATURES:
        encoded = encoders[f].transform(df[f])
        datalist.append(encoded.astype('float32'))
    return datalist

def addflag(flag, condition, name):
    empty = flag == "none"
    flag[condition & empty] = name
    flag[condition & ~empty] = flag[condition & ~empty] + " & " + name
    
def addcol(data, newname, newvals, warn=False):
    if newname in data.columns:
        if warn: print( f"Warning: replacing duplicate column {newname}" )
        data[newname] = newvals
    else:
        newcol = pd.Series(newvals, index = data.index, name=newname)
        data = pd.concat([data,newcol],axis=1)
    return data

def mkcases(df):
    flag = pd.Series("none", index=df.index)

    addflag(flag, df.last_yield.isna(), "no last yld")
    addflag(flag, df.last_yield < 150, "last yld < 1.5%")
    addflag(flag, df.last_yield.between(150,700), "1.5% <= last yld <= 7%")
    addflag(flag, df.last_yield > 700, "last yld > 7%")
    addflag(flag, df.when_issued, "when issued")
    
    print( flag.value_counts(dropna=False) )
    return flag.astype('category')

def mean_absolute_deviation(pred, truth):
    pred, truth = np.array(pred).reshape(-1,1), np.array(truth).reshape(-1,1)
    err = abs(pred - truth)
    return np.median(err)

def compare_mae(df, prediction_cols, groupby_cols, target_variable):
    
    if not isinstance(prediction_cols, list):
        raise TypeError(f'prediction_cols must be a list, got {type(prediction_cols)}, {type(groupby_cols)} instead')
    
    if groupby_cols and not isinstance(groupby_cols, list):
        raise TypeError(f'groupby_cols must be a list or None, got {type(groupby_cols)} instead')
    
    print(f'{f" Analysis for target: {target_variable} ":=^75}')
    
    nan_counts = df[prediction_cols].isna().sum() 
    
    for x,y  in df[prediction_cols].isna().sum().iteritems():
        print(f'Prediction col {x} has {y} nan values')
    
    df = df.dropna(subset=prediction_cols)

    if groupby_cols:
        temp = df[[target_variable, 'cases'] + prediction_cols + groupby_cols]\
                .groupby(groupby_cols, observed=True)\
                .apply(lambda x: [mean_absolute_error(x[target_variable], x[col]) for col in prediction_cols] + \
                        [mean_absolute_deviation(x[target_variable], x[col]) for col in prediction_cols] + [len(x)])   
        temp = pd.DataFrame(temp.to_list(), index = zip(['Overall']*len(temp),temp.index))

        temp2 = df[[target_variable, 'cases'] + prediction_cols + groupby_cols]\
                .groupby(['cases']+ groupby_cols, observed=True)\
                .apply(lambda x: [mean_absolute_error(x[target_variable], x[col]) for col in prediction_cols] + \
                        [mean_absolute_deviation(x[target_variable], x[col]) for col in prediction_cols] + [len(x)])   
        temp2 = pd.DataFrame(temp2.to_list(), index = temp2.index)
        summary = pd.concat([temp, temp2], axis=0)

    else:
        
        temp2 = df[[target_variable, 'cases'] + prediction_cols]\
                .groupby('cases', observed=True)\
                .apply(lambda x: [mean_absolute_error(x[target_variable], x[col]) for col in prediction_cols] + \
                        [mean_absolute_deviation(x[target_variable], x[col]) for col in prediction_cols] + [len(x)])   
        temp2 = pd.DataFrame(temp2.to_list(), index = temp2.index)
        
        temp = pd.DataFrame([mean_absolute_error(df[target_variable], df[col]) for col in prediction_cols] + \
                        [mean_absolute_deviation(df[target_variable], df[col]) for col in prediction_cols] + [len(df)], columns=['Overall']).T
    
    summary = pd.concat([temp, temp2], axis=0)  
    mae_col = ['MAE']*len(prediction_cols)
    mad_col = ['MAD']*len(prediction_cols)
    columns= list(zip(mae_col, prediction_cols)) + list(zip(mad_col, prediction_cols)) + [('', 'N')]
    summary.columns=pd.MultiIndex.from_tuples(columns)
    
    if groupby_cols:
        summary.index=pd.MultiIndex.from_tuples(summary.index, names = ['cases']+groupby_cols)
    else:
        pass
    
    summary[('', 'N')] = summary[('', 'N')].astype(int)
    return summary

In [7]:
fs = gcsfs.GCSFileSystem(project='eng-reactor-287421')
with fs.open('automated_training/encoders.pkl') as gf:
    encoders = pickle.load(gf)
    
fmax = {key: len(value.classes_) for key, value in encoders.items()}



In [134]:
model_dates = model_dates = ['09-26', '09-27', '09-28', '09-29', '10-02', '10-03']
target_dates = ['2023-'+ d for d in model_dates]
model_dict = {}
for d in model_dates:
    
    if d == '10-02':
        model_date = '09-30'
    else:
        model_date = d 
        
    try:
        model_path = os.path.join('gs://automated_training', 'yield_spread_model', 'model-'+model_date)
        model_dict[d] = keras.models.load_model(model_path)
    except: 
        model_path = os.path.join('gs://automated_training', 'model-'+model_date)
        model_dict[d] = keras.models.load_model(model_path)
        
    print(f'{model_path} loaded')

gs://automated_training/yield_spread_model/model-09-26 loaded
gs://automated_training/yield_spread_model/model-09-27 loaded
gs://automated_training/yield_spread_model/model-09-28 loaded
gs://automated_training/yield_spread_model/model-09-29 loaded
gs://automated_training/yield_spread_model/model-09-30 loaded
gs://automated_training/model-10-03 loaded


In [128]:
%time processed_data = load_data_from_pickle('processed_data.pkl', bucket = 'automated_training')

File not available, downloading from cloud storage and saving to processed_data.pkl
automated_training/processed_data.pkl
CPU times: user 4min 22s, sys: 33.2 s, total: 4min 55s
Wall time: 5min 54s


In [135]:
cusip_list = pd.read_csv('cusips.csv', header=None).iloc[:,0].to_list()
assert len(cusip_list) == len(set(cusip_list))

In [136]:
test_data = processed_data[processed_data.cusip.isin(cusip_list) & (processed_data['trade_date'].isin(target_dates))]
test_data.sort_values(by='trade_datetime', inplace=True, ascending=True)
test_data.reset_index(drop=True, inplace=True)

In [137]:
test_data.trade_date.min(), test_data.trade_date.max()

(Timestamp('2023-09-26 00:00:00'), Timestamp('2023-10-03 00:00:00'))

In [138]:
test_data.last_seconds_ago = test_data.last_seconds_ago.fillna(0)
test_data.last_yield_spread = test_data.last_yield_spread.fillna(0)

In [158]:
nas = test_data[CATEGORICAL_FEATURES+BINARY+NON_CAT_FEATURES].isna().sum()
nas[nas>0].sort_values()

Series([], dtype: int64)

In [139]:
dollar_price_cusips = set(cusip_list) - set(processed_data[processed_data.trade_date>='2023-09-26'].cusip)
# pd.Series(list(dollar_price_cusips)).to_csv('missingcusips.csv', index=False)

In [140]:
len(dollar_price_cusips)

196

In [184]:
len(cusip_list)-196

1217

In [141]:
cusip_str = '"'+'","'.join(dollar_price_cusips)+'"'

query = f'''
SELECT * FROM `eng-reactor-287421.auxiliary_views.materialized_trade_history` 
WHERE trade_date>="2023-09-26"
AND cusip in ({cusip_str})
'''

bqdf = sqltodf(query, bq_client)

In [193]:
bqdfcols = bqdf.columns
bqdfcols = [col for col in CATEGORICAL_FEATURES+NON_CAT_FEATURES+BINARY if col in bqdfcols]
temp = bqdf[bqdfcols].isna().sum()
temp[temp > 0]

coupon                           463
maturity_amount                   41
issue_price                      149
orig_principal_amount             41
max_amount_outstanding            40
extraordinary_make_whole_call    251
make_whole_call                  118
dtype: int64

In [143]:
missing = [cusip for cusip in dollar_price_cusips if cusip not in set(bqdf.cusip)]
len(missing)

9

In [177]:
missing

['64966FP81',
 '1.67E+44',
 '6.46E+06',
 '2.51E+11',
 '7.35E+25',
 '19042FAB2',
 '16772PDS2',
 '14043FAC0',
 '67756CFY9']

In [170]:
pd.Series(cusip_list).loc[[1212, 809, 166, 1177]]

1212    1.67E+44
809     7.35E+25
166     6.46E+06
1177    2.51E+11
dtype: object

# Evaluating performance

In [111]:
def filter_df(df):
    N = len(df) 
    df = df[(df.days_to_call == 0) | (df.days_to_call > np.log10(400))]
    df = df[(df.days_to_refund == 0) | (df.days_to_refund > np.log10(400))]
    df = df[(df.days_to_maturity == 0) | (df.days_to_maturity > np.log10(400))]
    df = df[df.days_to_maturity < np.log10(30000)]
    # df = df[~df.last_calc_date.isna()]
    print(f'===== Short maturity filter droppping {N-len(df)} trades =====')
    return df 

MAE on all trades in all cusips in target dates

In [150]:
for date in model_dates:
    rows = processed_data.trade_date == '2023-'+date
    x = processed_data[rows]
    
    if not len(x):
        print(f'No trades found for {date}, passing')
        continue
    
    pred = model_dict[date].predict(create_input(x), batch_size=10000)
    processed_data.loc[rows, 'prediction'] = pred
    
    temp = processed_data.loc[rows]
    
    print(f'{date}, N={len(temp)}: {mean_absolute_error(temp.new_ys, temp.prediction):2f}')    

09-26, N=54380: 15.475519
09-27, N=60235: 16.539041
09-28, N=58931: 15.337590
09-29, N=50247: 14.145222
10-02, N=52973: 11.706711
10-03, N=59091: 12.027202


MAE on all trades in all cusips in target dates after short maturity filter 

In [148]:
for date in model_dates:
    rows = processed_data.trade_date == '2023-'+date
    x = processed_data[rows]
    
    if not len(x):
        print(f'No trades found for {date}, passing')
        continue
    
    pred = model_dict[date].predict(create_input(x), batch_size=10000)
    processed_data.loc[rows, 'prediction'] = pred
    
    temp = filter_df(processed_data.loc[rows])
    
    
    # print(f'{date}, N={len(x)}: {mean_absolute_error(pred, processed_data[rows].new_ys):2f}')
    print(f'{date}, N={len(temp)}: {mean_absolute_error(temp.new_ys, temp.prediction):2f}')    

===== Short maturity filter droppping 9128 trades =====
09-26, N=45252: 10.551275
===== Short maturity filter droppping 9900 trades =====
09-27, N=50335: 10.651320
===== Short maturity filter droppping 9568 trades =====
09-28, N=49363: 12.626026
===== Short maturity filter droppping 7978 trades =====
09-29, N=42269: 11.087543
===== Short maturity filter droppping 5152 trades =====
10-02, N=47821: 10.525455
===== Short maturity filter droppping 5868 trades =====
10-03, N=53223: 10.375335


MAE on all trades in mackayshields cusips in target dates

In [200]:
for date in model_dates:
    rows = test_data.trade_date == '2023-'+date
    x = test_data[rows]
    
    if not len(x):
        print(f'No trades found for {date}, passing')
        continue
    
    pred = model_dict[date].predict(create_input(x), batch_size=10000)
    test_data.loc[rows, 'prediction'] = pred
    print(f'{date}, N={len(x)}: {mean_absolute_error(pred, test_data[rows].new_ys):2f}')

09-26, N=2759: 11.158258
09-27, N=3876: 8.371860
09-28, N=4096: 10.470748
09-29, N=2305: 14.136755
10-02, N=3282: 8.751101
10-03, N=4474: 6.779404


MAE on all trades in mackayshields cusips in target dates after short filter

In [145]:
for date in model_dates:
    rows = test_data.trade_date == '2023-'+date
    x = test_data[rows]
    
    if not len(x):
        print(f'No trades found for {date}, passing')
        continue
    
    pred = model_dict[date].predict(create_input(x), batch_size=10000)
    test_data.loc[rows, 'prediction'] = pred
    
    temp = filter_df(test_data.loc[rows])
    
    
    # print(f'{date}, N={len(x)}: {mean_absolute_error(pred, processed_data[rows].new_ys):2f}')
    print(f'{date}, N={len(temp)}: {mean_absolute_error(temp.new_ys, temp.prediction):2f}')    

===== Short maturity filter droppping 933 trades =====
09-26, N=1826: 9.819137
===== Short maturity filter droppping 925 trades =====
09-27, N=2951: 7.968249
===== Short maturity filter droppping 960 trades =====
09-28, N=3136: 10.241110
===== Short maturity filter droppping 557 trades =====
09-29, N=1748: 13.242328
===== Short maturity filter droppping 659 trades =====
10-02, N=2623: 7.602001
===== Short maturity filter droppping 1057 trades =====
10-03, N=3417: 6.086834


In [152]:
test_data = addcol(test_data, 'cases', mkcases(test_data))

1.5% <= last yld <= 7%                  18193
1.5% <= last yld <= 7% & when issued     2082
no last yld & when issued                 380
no last yld                                73
last yld > 7%                              55
last yld < 1.5%                             9
dtype: int64


In [183]:
test_data[['rtrs_control_number','cusip','trade_datetime','yield','new_ys','prediction']].to_csv('mackayshields_predictions.csv', index=False)

In [159]:
summary = compare_mae(df=test_data, 
                          prediction_cols = ['prediction'], 
                          groupby_cols = ['trade_date'],
                      target_variable='new_ys'
                         )
display(summary)

summary = compare_mae(df=filter_df(test_data), 
                          prediction_cols = ['prediction'],   
                          groupby_cols = ['trade_date'],
                      target_variable='new_ys'
                         )
display(summary)

Prediction col prediction has 0 nan values


Unnamed: 0_level_0,Unnamed: 1_level_0,MAE,MAD,Unnamed: 4_level_0
Unnamed: 0_level_1,Unnamed: 1_level_1,prediction,prediction,N
cases,trade_date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Overall,2023-09-26,11.158,3.884,2759
Overall,2023-09-27,8.372,3.202,3876
Overall,2023-09-28,10.471,5.77,4096
Overall,2023-09-29,14.137,6.328,2305
Overall,2023-10-02,8.751,5.964,3282
Overall,2023-10-03,6.779,3.579,4474
1.5% <= last yld <= 7%,2023-09-26,9.47,3.809,2485
1.5% <= last yld <= 7%,2023-09-27,7.199,3.096,3192
1.5% <= last yld <= 7%,2023-09-28,9.125,5.812,3416
1.5% <= last yld <= 7%,2023-09-29,11.154,6.202,2051


===== Short maturity filter droppping 5091 trades =====
Prediction col prediction has 0 nan values


Unnamed: 0_level_0,Unnamed: 1_level_0,MAE,MAD,Unnamed: 4_level_0
Unnamed: 0_level_1,Unnamed: 1_level_1,prediction,prediction,N
cases,trade_date,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Overall,2023-09-26,9.819,3.542,1826
Overall,2023-09-27,7.968,3.15,2951
Overall,2023-09-28,10.241,5.654,3136
Overall,2023-09-29,13.242,6.234,1748
Overall,2023-10-02,7.602,5.988,2623
Overall,2023-10-03,6.087,3.508,3417
1.5% <= last yld <= 7%,2023-09-26,7.881,3.467,1569
1.5% <= last yld <= 7%,2023-09-27,6.547,2.937,2277
1.5% <= last yld <= 7%,2023-09-28,8.551,5.689,2463
1.5% <= last yld <= 7%,2023-09-29,9.329,6.068,1506
