This notebook manually updates trades to the trade history redis in the event of missing trades. This has been used due to transient issues with the `get_msrb_trade_messages` cloud function and the `fast_trade_history_redis_update` cloud function. See https://www.notion.so/Research-on-making-sure-we-are-getting-all-the-data-c5147fbaf0494d0abf051b557357ba1b for more details. The particular transient issues for those cloud functions have been handled (by updating `latest_sequence_number` at the end of `fast_trade_history_redis_update` instead of at the beginning) but this notebook may still be valuable in any instance that requires manually updating the trade history redis.

In [None]:
import os

# TODO: comment the below line out when not running the function locally
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/home/user/ficc/mitas_creds.json'

import decimal
import pandas as pd

from google.cloud import bigquery

from main import get_frequency, \
                 add_restrictions_on_interest_payment_frequency, \
                 typecast_yield, \
                 add_calc_date, \
                 concatenate_date_and_time_objects_into_datetime_object, \
                 typecast_for_bigquery, \
                 update_trade_history_redis, \
                 upload_trade_history_to_trade_history_bigquery

In [None]:
BQ_CLIENT = bigquery.Client()

In [None]:
def sqltodf(sql_query, limit=''):
    if limit != '': limit = f' ORDER BY RAND() LIMIT {limit}'
    return BQ_CLIENT.query(sql_query + limit).result().to_dataframe()

In [None]:
# maps the query to a pickle filename so that after running the query (which takes about 5 minutes) we do not need to re-run it if there are downstream errors
query_to_pickle_filename = {'select * from `auxiliary_views.trades_with_ref_data_pd` where rtrs_control_number = 2024010300517200': '923004TL9.pkl', 
                            'SELECT b.*EXCEPT (material_event_history,default_event_history) FROM `eng-reactor-287421.jesse_tests.missing_trades` a LEFT JOIN `auxiliary_views.trades_with_ref_data_pd` b ON a.rtrs_control_number = b.rtrs_control_number': 'missing_trades.pkl'}

In [None]:
query = 'SELECT b.*EXCEPT (material_event_history,default_event_history) FROM `eng-reactor-287421.jesse_tests.missing_trades` a LEFT JOIN `auxiliary_views.trades_with_ref_data_pd` b ON a.rtrs_control_number = b.rtrs_control_number'
pickle_filename = query_to_pickle_filename.get(query, None)
if pickle_filename is not None and os.path.isfile(pickle_filename):
    all_data = pd.read_pickle(pickle_filename)
else:
    all_data = sqltodf(query)
    all_data['series_id'] = all_data['issue_key']
    if pickle_filename is not None: all_data.to_pickle(pickle_filename)

In [None]:
all_data

In [None]:
for column in all_data.columns:
    column_value = all_data[column].iloc[0]
    if isinstance(column_value, decimal.Decimal): 
        print(f'{column} is currently type decimal.Decimal, so converting it to float')
        all_data[column] = pd.to_numeric(all_data[column])

In [None]:
for column in ('publish_time', 'time_of_trade'):    # these columns need to be string type instead of datetime.time type since `concatenate_date_and_time_objects_into_datetime_object(...)` expects them to be strings
    all_data[column] = all_data[column].astype(str)

In [None]:
all_data_after_restrictions = add_restrictions_on_interest_payment_frequency(all_data)

if len(all_data_after_restrictions) == 0:    # do not perform any processing if there are no trades after applying restrictions
    print(f'No trades left after applying restrictions. Before restrictions:')
    print(all_data.to_markdown())
else:
    all_data = all_data_after_restrictions
    all_data['interest_payment_frequency'] = all_data['interest_payment_frequency'].apply(get_frequency)
    all_data = typecast_yield(all_data)
    all_data = add_calc_date(all_data)
    all_data = concatenate_date_and_time_objects_into_datetime_object(all_data)
    all_data = typecast_for_bigquery(all_data, {'issue_key': 'Int64'})    # sometimes the numerical data that is supposed to be an integer comes in as a float causing an error when attempting to upload to BigQuery; use `Int64` instead of `int` to allow conversion when there are `None` values: https://stackoverflow.com/questions/26614465/python-pandas-apply-function-if-a-column-value-is-not-null
    cusip_trade_history_pairs = update_trade_history_redis(all_data)
    upload_trade_history_to_trade_history_bigquery(cusip_trade_history_pairs)

In [None]:
if pickle_filename is not None and os.path.isfile(pickle_filename): os.remove(pickle_filename)    # remove the pickle file after completing the processing