Last updated by Developer on 2025-02-13.

This notebook inspects the data in the two redis's used in the product. An example situation where this notebook is effective is when looking for a CUSIP that has negative yields in the history.

In [None]:
import redis
import pickle
from itertools import zip_longest
from datetime import datetime

import pandas as pd

from pympler import asizeof    # used to get the size of python objects

In [None]:
import os
import sys


__file__ = os.path.abspath('inspect_redis_data.ipynb')    # in a Jupyter Notebook, the `__file__` variable is not automatically defined because notebooks do not run as standard Python scripts
server_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'app_engine', 'demo', 'server'))    # get the directory containing the 'app_engine/demo/server' package
sys.path.append(server_dir)    # add the directory to sys.path


from modules.ficc.utils.trade_dict_to_list import TRADE_FEATURE_TO_INDEX
from modules.ficc.utils.auxiliary_variables import NUM_OF_DAYS_IN_YEAR
from modules.ficc.utils.diff_in_days import diff_in_days_two_dates


from modules.get_creds import get_creds
get_creds()


from modules.auxiliary_variables import SEQUENCE_LENGTH, REFERENCE_DATA_FEATURES
from modules.batch_pricing import get_predictions_from_batch_pricing
from modules.similar_trade_history import similar_group_to_similar_key

In [None]:
reference_data_redis_client = redis.Redis(host='10.108.4.36', port=6379, db=0)    # use read endpoint since use case is read-only allowing for lower latency and to not accidentally corrupt the redis by attempting to write to it
trade_history_redis_client = redis.Redis(host='10.75.46.229', port=6379, db=0)    # use read endpoint since use case is read-only allowing for lower latency and to not accidentally corrupt the redis by attempting to write to it
similar_trade_history_redis_client = redis.Redis(host='10.117.191.181', port=6379, db=0)    # use read endpoint since use case is read-only allowing for lower latency and to not accidentally corrupt the redis by attempting to write to it

See trade history for an arbitrary CUSIP.

In [None]:
trade_history_data = pickle.loads(trade_history_redis_client.get('64971XQM3'))
print(f'Features for each CUSIP in reference data:')
print(trade_history_data)

See similar trade history for an arbitrary CUSIP.

In [None]:
# taken directly from `ficc/app_engine/demo/server/modules/data_preparation_for_pricing.py::get_data_from_redis(...)::get_data_for_single_cusip(...)::get_similar_trade_history_data(...)`
def get_similar_trade_history_data(reference_data):
    '''Get similar trade history data from the similar trade history redis.'''
    try:
        current_date = datetime.now().date()

        issue_key = reference_data['issue_key']
        maturity_date = reference_data['maturity_date']
        years_to_maturity_date_by_5 = (diff_in_days_two_dates(maturity_date, current_date) // NUM_OF_DAYS_IN_YEAR) // 5
        coupon_by_1 = -1 if reference_data['coupon'] == 0 else reference_data['coupon'] // 1
        similar_trade_history_group = (int(issue_key), int(years_to_maturity_date_by_5), int(coupon_by_1))
        similar_trade_history_group = similar_group_to_similar_key(similar_trade_history_group)
        print('similar_trade_history_group:', similar_trade_history_group)
        similar_trade_history_data = pickle.loads(similar_trade_history_redis_client.get(similar_trade_history_group)) if similar_trade_history_redis_client.exists(similar_trade_history_group) else []
    except Exception as e:
        print(f'Unable to create a similar trade group due to {type(e)}: {e}')
        print(f'issue_key: {reference_data["issue_key"]}\tmaturity_date: {reference_data["maturity_date"]}\tcoupon: {reference_data["coupon"]}')
        print(e)
        similar_trade_history_data = []
    return similar_trade_history_data

In [None]:
similar_trade_history_data = get_similar_trade_history_data(pickle.loads(reference_data_redis_client.get('601670MU3'))[0])    # index 0 indicates the most recent snapshot of the reference data

In [None]:
similar_trade_history_data

See reference features for an arbitrary CUSIP.

In [None]:
reference_data = pickle.loads(reference_data_redis_client.get('65165PAB4'))

In [None]:
reference_data_most_recent = reference_data[0]    # index 0 indicates the most recent snapshot of the reference data
reference_data_most_recent

In [None]:
reference_data_second_most_recent = reference_data[1]
reference_data_second_most_recent

Compare the three most recent snapshots of the reference data.

In [None]:
reference_data_third_most_recent = reference_data[2]
reference_data_df = pd.DataFrame([reference_data_most_recent, reference_data_second_most_recent, reference_data_third_most_recent], columns=REFERENCE_DATA_FEATURES)
print(reference_data_df.to_markdown())

Find differences between the two most recent snapshots of reference data.

In [None]:
# Compare two rows, e.g., row 1 and row 0
row1, row2 = reference_data_df.loc[1], reference_data_df.loc[0]
difference = (row1.notna() & row2.notna() & (row1 != row2)) | (row1.isna() ^ row2.isna())    # only true differences are marked while ignoring cases where both are NaN

# Show only columns that differ
diff_columns = reference_data_df.columns[difference]
diff_values = pd.DataFrame({'Column': diff_columns, 
                            'Row 0': row1[diff_columns], 
                            'Row 1': row2[diff_columns]})

print(diff_values.to_markdown())

Find a CUSIP that trades frequently where all of the last few trades have negative yield. This CUSIP will be used for automated testing to make sure that we refuse to price these CUSIPs with an appropriate error message.

In [None]:
# taken directly from `finance.py`
def get_trade_history_yields(trade_history):
    if len(trade_history) == 0: return []
    trade_history = trade_history[:SEQUENCE_LENGTH]    # only consider the last `SEQUENCE_LENGTH` trades
    yield_idx = TRADE_FEATURE_TO_INDEX['yield']
    return [trade[yield_idx] for trade in trade_history]

In [None]:
for cusip in trade_history_redis_client.scan_iter():    # scan_iter() is superior to keys() for large numbers of keys because it gives you an iterator you can use rather than trying to load all the keys into memory; https://stackoverflow.com/questions/22255589/get-all-keys-in-redis-database-with-python
    trade_history_data = pickle.loads(trade_history_redis_client.get(cusip)) if trade_history_redis_client.exists(cusip) else []
    if len(trade_history_data) < SEQUENCE_LENGTH: continue    # want trade history to be at least of length `SEQUENCE_LENGTH` to show that it trades frequently
    trade_history_yields = get_trade_history_yields(trade_history_data)
    if all([trade_history_yield is not None and trade_history_yield < 0 for trade_history_yield in trade_history_yields]):
        print(cusip)
        print(trade_history_data)
        break

Find CUSIPs where the days to the calc date is less than 60 days. Some of these CUSIPs will be used for automated testing to make sure that we refuse to price these CUSIPs with an appropriate error message. Scan the redis in batches to speed up processing.

In [None]:
def batcher(iterable, num_items):
    '''Get `num_items` size batch of items from `iterable`.
    https://stackoverflow.com/questions/22255589/get-all-keys-in-redis-database-with-python'''
    args = [iter(iterable)] * num_items
    return zip_longest(*args)

In [None]:
for batch in batcher(reference_data_redis_client.scan_iter(), 2000):    # 2000 is chosen because this is the value of `LARGE_BATCH_SIZE` in `get_predictions_from_batch_pricing(...)` and we do not want to make API calls here to keep things simple and self-contained
    print(batch)
    break

In [None]:
# TODO: perform `get_predictions_from_batch_pricing(...)` on each batch of CUSIPs, not just the most previous, and perform post-processing on the output to identify cases where the calc date is less than 60 days away
get_predictions_from_batch_pricing((list(batch), None, None), 500, 'S', 'eng@ficc.ai', False, access_token=None, logging=False)