In [1]:
import csv

import redis
import pickle
import pandas as pd
import multiprocess as mp    # using `multiprocess` instead of `multiprocessing` because function to be called in `map` is in the same file as the function which is calling it: https://stackoverflow.com/questions/41385708/multiprocessing-example-giving-attributeerro

In [2]:
# taken directly from `cloud_functions/fast_trade_history_redis_update/main.py`
FEATURES_FOR_EACH_TRADE_IN_HISTORY = {'msrb_valid_from_date': 'DATETIME', 
                                      'msrb_valid_to_date': 'DATETIME', 
                                      'rtrs_control_number': 'INTEGER', 
                                      'trade_datetime': 'DATETIME', 
                                      'publish_datetime': 'DATETIME', 
                                      'yield': 'FLOAT', 
                                      'dollar_price': 'FLOAT', 
                                      'par_traded': 'NUMERIC', 
                                      'trade_type': 'STRING', 
                                      'is_non_transaction_based_compensation': 'BOOLEAN', 
                                      'is_lop_or_takedown': 'BOOLEAN', 
                                      'brokers_broker': 'STRING', 
                                      'is_alternative_trading_system': 'BOOLEAN', 
                                      'is_weighted_average_price': 'BOOLEAN', 
                                      'settlement_date': 'DATE', 
                                      'calc_date': 'DATE', 
                                      'calc_day_cat': 'INTEGER', 
                                      'maturity_date': 'DATE', 
                                      'next_call_date': 'DATE', 
                                      'par_call_date': 'DATE', 
                                      'refund_date': 'DATE', 
                                      'transaction_type': 'STRING', 
                                      'sequence_number': 'INTEGER'}

In [3]:
trade_history_redis_client = redis.Redis(host='10.75.46.228', port=6379, db=0)

In [4]:
def has_negative_yields(trade_history_df):
    '''First remove missing yields and then check if there are negative yields.'''
    trade_history_df = trade_history_df[~pd.isna(trade_history_df['yield'])]    # remove trades with missing yields
    return (trade_history_df['yield'] < 0).any()


def has_missing_yields(trade_history_df):
    '''Check if any trade has a missing yield.'''
    return pd.isna(trade_history_df['yield']).any()

Without multiprocessing.

In [5]:
negative_yields_in_trade_history = []
missing_yields_in_trade_history = []
for cusip in trade_history_redis_client.scan_iter():    # scan_iter() is superior to keys() for large numbers of keys because it gives you an iterator you can use rather than trying to load all the keys into memory; https://stackoverflow.com/questions/22255589/get-all-keys-in-redis-database-with-python
    trade_history_df = pd.DataFrame(pickle.loads(trade_history_redis_client.get(cusip)), columns=list(FEATURES_FOR_EACH_TRADE_IN_HISTORY.keys()))
    if has_negative_yields(trade_history_df):    # may have missing yields as well
        negative_yields_in_trade_history.append(cusip.decode('utf-8'))    # decode with 'utf-8' is necessary since the keys are byte-strings; https://stackoverflow.com/questions/606191/convert-bytes-to-a-string-in-python-3
    if has_missing_yields(trade_history_df):
        missing_yields_in_trade_history.append(cusip.decode('utf-8'))    # decode with 'utf-8' is necessary since the keys are byte-strings; https://stackoverflow.com/questions/606191/convert-bytes-to-a-string-in-python-3

With multiprocessing.

In [None]:
def has_negative_yields_or_missing_yields(cusip):
    trade_history = pickle.loads(trade_history_redis_client.get(cusip))
    return cusip.decode('utf-8'), has_negative_yields(trade_history), has_missing_yields(trade_history)    # decode with 'utf-8' is necessary since the keys are byte-strings; https://stackoverflow.com/questions/606191/convert-bytes-to-a-string-in-python-3

In [None]:
with mp.Pool() as pool_object:
    cusips_with_negative_yields_or_missing_yields = pool_object.map(has_negative_yields_or_missing_yields, trade_history_redis_client.scan_iter())

negative_yields_in_trade_history, missing_yields_in_trade_history = [], []
for cusip, has_negative_yield, missing_yield in cusips_with_negative_yields_or_missing_yields:
    if has_negative_yield: negative_yields_in_trade_history.append(cusip)
    if has_missing_yields: missing_yields_in_trade_history.append(cusip)

Write results to CSV.

In [6]:
def write_cusip_list_to_csv(cusip_list, csv_file_name):
    '''Write each CUSIP in `cusip_list` to a new line in the csv at `csv_file_name`.'''
    with open(csv_file_name, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        for cusip in cusip_list:
            csv_writer.writerow([cusip])
    return csv_file_name

In [7]:
write_cusip_list_to_csv(negative_yields_in_trade_history, 'cusips_with_negative_yields_in_trade_history.csv')
write_cusip_list_to_csv(missing_yields_in_trade_history, 'cusips_with_missing_yields_in_trade_history.csv')

'cusips_with_missing_yields_in_trade_history.csv'