# Load Packages, Variables

In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import time
import gc
import json
import pytz

import numpy as np
from google.cloud import bigquery
from google.cloud import storage
import gcsfs

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)


from tensorflow import keras
from tensorflow.keras import layers
import seaborn as sns
from pandas.tseries.offsets import BDay

from tensorflow.keras.layers import Embedding
from tensorflow.keras import activations
from tensorflow.keras import backend as K
from tensorflow.keras import initializers
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from sklearn import preprocessing
from sklearn.metrics import mean_absolute_error
from datetime import datetime
import matplotlib.pyplot as plt
import pickle5 as pickle


from ficc.utils.nelson_siegel_model import *
from ficc.utils.diff_in_days import *
from ficc.utils.auxiliary_functions import sqltodf
import os


from ficc.data.process_data import process_data
from ficc.utils.auxiliary_variables import PREDICTORS, NON_CAT_FEATURES, BINARY, CATEGORICAL_FEATURES, IDENTIFIERS, PURPOSE_CLASS_DICT, NUM_OF_DAYS_IN_YEAR
from ficc.utils.gcp_storage_functions import upload_data, download_data
from ficc.utils.auxiliary_variables import RELATED_TRADE_BINARY_FEATURES, RELATED_TRADE_NON_CAT_FEATURES, RELATED_TRADE_CATEGORICAL_FEATURES

import sys
sys.path.append('../')
# from ficc_keras_utils import *
# import ficc_keras_utils
# from lgbm_tools import *
# from ficc_debiasing import *

pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(f'TF Version: {tf.__version__}')

2023-10-13 19:38:28.124407: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-13 19:38:28.139976: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-10-13 19:38:28.142641: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


Initializing pandarallel with 16.0 cores
INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
TF Version: 2.8.4


In [3]:
bq_client = bigquery.Client()

In [4]:
QUERY = '''
SELECT * 
FROM `eng-reactor-287421.historical_predictions_test.historical_predictions_test` 
ORDER BY trade_date
'''

df = sqltodf(QUERY, bq_client)

In [5]:
df

Unnamed: 0,rtrs_control_number,cusip,trade_date,dollar_price,yield,new_ficc_ycl,new_ys,new_ys_prediction,prediction_datetime
0,2023030103281000,6966674Y7,2023-03-01,106.511,288.000,305.229,-17.229,-12.323,2023-10-10 07:05:00
1,2023030100221100,196711SC0,2023-03-01,114.683,288.000,306.453,-18.453,-18.450,2023-10-10 07:05:00
2,2023030100419900,677522NS9,2023-03-01,101.814,320.000,335.276,-15.276,-19.680,2023-10-10 07:05:00
3,2023030111281800,80585LBY3,2023-03-01,99.132,352.000,305.819,46.181,71.255,2023-10-10 07:05:00
4,2023030102491100,254845MD9,2023-03-01,103.215,368.000,314.216,53.784,67.951,2023-10-10 07:05:00
...,...,...,...,...,...,...,...,...,...
6095765,2023092911772300,890091WR8,2023-09-29,95.674,410.000,379.297,30.703,9.598,2023-10-09 08:45:00
6095766,2023092912942300,586815ND0,2023-09-29,106.807,410.000,396.091,13.909,16.108,2023-10-09 08:45:00
6095767,2023092902209900,048501HP8,2023-09-29,102.307,410.000,378.446,31.554,21.066,2023-10-09 08:45:00
6095768,2023092905600900,754405HG6,2023-09-29,99.128,410.000,406.378,3.622,7.340,2023-10-09 08:45:00


In [6]:
additional_trades = pd.read_pickle('gs://ficc_training_data_latest/processed_data_historic_yc.pkl')

In [7]:
df = df[df.rtrs_control_number.isin(additional_trades.rtrs_control_number)]

In [8]:
df

Unnamed: 0,rtrs_control_number,cusip,trade_date,dollar_price,yield,new_ficc_ycl,new_ys,new_ys_prediction,prediction_datetime
84,2023030108516500,797848GT9,2023-03-01,101.900,360.500,309.816,50.684,9.036,2023-10-10 07:05:00
355,2023030110241600,033630CM4,2023-03-01,99.807,359.100,355.777,3.323,140.807,2023-10-10 07:05:00
431,2023030101590200,85732M5B9,2023-03-01,110.650,328.300,305.281,23.019,14.436,2023-10-10 07:05:00
551,2023030107710100,17163WCR5,2023-03-01,101.500,375.000,335.163,39.837,22.223,2023-10-10 07:05:00
735,2023030104719500,014698AA2,2023-03-01,105.359,332.400,380.082,-47.682,-17.413,2023-10-10 07:05:00
...,...,...,...,...,...,...,...,...,...
6093695,2023092907639700,797300YU8,2023-09-29,100.219,216.400,378.853,-162.453,112.603,2023-10-09 08:45:00
6093755,2023092908626400,226512BF7,2023-09-29,93.274,494.800,381.261,113.539,120.838,2023-10-09 08:45:00
6093967,2023092900739900,853207FE3,2023-09-29,61.813,558.400,464.578,93.822,82.651,2023-10-09 08:45:00
6094866,2023092912102000,825227LA7,2023-09-29,87.790,503.000,378.093,124.907,132.957,2023-10-09 08:45:00


In [9]:
additional_trades[additional_trades.rtrs_control_number==2023030108516500][['dollar_price', 'yield','new_ys', 'new_ficc_ycl']]

Unnamed: 0,dollar_price,yield,new_ys,new_ficc_ycl
30301,101.9,360.5,53.168,307.332


In [22]:
accuracy = {}
for date in df.trade_date.unique():
    temp = df[df.trade_date==date]
    try:
        accuracy[date] = mean_absolute_error(temp.new_ys, temp.new_ys_prediction)
    except Exception as e:
        raise e
        print(f'error for {date}')
    
accuracy = pd.DataFrame.from_dict(accuracy, orient='index')
accuracy.columns = ['MAE']

In [24]:
accuracy.mean()

MAE   31.100
dtype: float64