<left>FINM 33150 - Quantitative Trading Strategies</left>
<left>Winter 2023</left>
<br>
<h1><center> Homework 4: Accumulation Opportunity </center></h1>
<center>Due - 23:00 [CST] February 2nd, 2023</center>
<br>
<h3>Ki Hyun</h3>
<h3>Student ID: 12125881</h3>

<h5> Imports </h5>

In [1]:
%matplotlib inline

In [14]:
import os
import datetime
import pickle
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd
import numpy as np
import scipy as sp
import seaborn as sns
from enum import Enum

<h5> Constants </h5>

In [3]:
# constants needed for grabbing and cleaning data
data_home_dir = "C:\\Users\\kwhyu\\OneDrive - The University of Chicago\\2023-1 Winter\\FINM " \
                "33150\\FINM-33150-W23\\Data\\Crypto\\"
pairs = ['BTC-USD', 'ETH-USD', 'ETH-BTC']
years = ['2021', '2022', '2023']
# constants needed for trading simulation
reaction_time = int(0.1 * (10 ** 9)) # in nano seconds
currency_crypto_cost = 50 * (10 ** (-4))
crypto_crypto_cost = 10 * (10 ** (-4))

<h5> Helper Functions </h5>

In [4]:
class Order(Enum):
    Sell = -1
    Buy = 1

In [5]:
def read_data(pair, year):
    global data_home_dir
    # creating directory for the pair and year
    book_dir = f'book_narrow_{pair}_{year}.delim.gz'
    trades_dir = f'trades_narrow_{pair}_{year}.delim.gz'
    # retrieving book data:
    print(">>> Retrieving Book data for ", pair, " for the year ", year, "...")
    try:
        book = pd.read_table(os.path.join(data_home_dir, book_dir))
        print(">>> Success!")
    except:
        print(">>> Book data for ", pair, " for the year ", year, " does not exist")
        book = None
    # retrieving trades data:
    print(">>> Retrieving Trades data for ", pair, " for the year ", year, "...")
    try:
        trades = pd.read_table(os.path.join(data_home_dir, trades_dir))
        print(">>> Success!")
    except:
        print(">>> Trades data for ", pair, " for the year ", year, " does not exist")
        return book, None, pair, year, None
    # indexing trades data by Time and Nano second
    trades['Time'] = (trades['timestamp_utc_nanoseconds'] / 10**9).apply(int)\
        .apply(lambda x: datetime.datetime.fromtimestamp(x))
    trades['Nano second'] = trades['timestamp_utc_nanoseconds'] % 10**9
    trades = trades.set_index(['Time', 'Nano second']).sort_index().drop(columns = {'received_utc_nanoseconds'})
    # sorting into buy and sell data
    sell = trades[trades['Side'] < 0]
    buy = trades[trades['Side'] > 0]

    return book, buy, pair, year, sell

In [6]:
def clean_trading_data(raw_df, side, reaction_time):
    df = raw_df[side.value].copy()
    pair = raw_df[2]
    year = raw_df[3]
    if side == Order.Buy:
        detail = "Buy"
    else:
        detail = "Sell"
    print(">>> Cleaning", pair, "for the year", year, detail, "Trades data for VWAP Algorithm based on reaction time",
          reaction_time/(10**9), " seconds...")
    df['hold_time_nanoseconds'] = (df['timestamp_utc_nanoseconds'].shift(-1).fillna(method = 'ffill') -
                                   df['timestamp_utc_nanoseconds']).apply(int)
    df = df.drop(columns = {'timestamp_utc_nanoseconds', 'Side'})
    df = df[df['hold_time_nanoseconds'] > 0]

    temp_l = []
    interval = int(0)
    for trade in df.index:
        interval += df.loc[trade, 'hold_time_nanoseconds']
        if interval >= reaction_time:
            temp_l.append(True)
            interval = int(0)
        else:
            temp_l.append(False)
    df['qualify'] = temp_l
    print(">>> Success!")
    return df

In [17]:
def data_factory(pairs, years, avoid_file_load = False):
    if not avoid_file_load:
        dir = r'./data/full_data.pkl'
        with open(dir, 'rb') as f:
            try:
                ret_dict = pickle.load(f)
                print(">>> Found data...")
                return ret_dict
            except:
                print(">>> No data found")
                print(">>> Proceeding to compute the data...")

    global reaction_time
    ret_dict = {}
    for pair in pairs:
        for year in years:
            raw_df = read_data(pair, year)
            if raw_df[1] is None:
                buy = None
                sell = None
            else:
                buy = clean_trading_data(raw_df, Order.Buy, reaction_time)
                sell = clean_trading_data(raw_df, Order.Sell, reaction_time)

            ret_dict[(pair, year)] = [raw_df[0], buy, sell]

    if not avoid_file_load:
        dir = r'./data/full_data.pkl'
        with open(dir, 'wb') as f:
            pickle.dump(ret_dict, f)

    return ret_dict

In [8]:
def VWAP_participation(df_cleaned, Q, p, start_time,
                       side, transaction_cost):
    df = df_cleaned.loc[start_time:].copy()

    if side == Order.Buy:
        f = False
    else:
        f = True

    accumulated = 0
    indexes = df[df['qualify']].index
    i = 0
    ret_df = {'datetime': [],
              'Nano seconds': [],
              'PriceMillionths': [],
              'SizeBillionths': []}

    while(accumulated < Q and i < len(indexes)):
        if i == 0:
            temp = df.loc[:indexes[i]].sort_values(by = ['PriceMillionths'], ascending = f).iloc[0]
        else:
            temp = df.loc[indexes[i-1]:indexes[i]].iloc[1:].sort_values(by = ['PriceMillionths'], ascending = f).iloc[0]

        ret_df['datetime'].append(temp.name[0])
        ret_df['Nano seconds'].append(temp.name[1])
        ret_df['PriceMillionths'].append(temp['PriceMillionths'])
        ret_df['SizeBillionths'].append(temp['SizeBillionths'] * p)

        accumulated += temp['SizeBillionths'] * p
        i += 1

    ret_df = pd.DataFrame(ret_df).set_index(['datetime', 'Nano seconds'])
    ret_df['NotionalMillionths'] = ret_df['PriceMillionths'] * ret_df['SizeBillionths'] / (10**9)
    ret_df['TradingCostsMillionths'] = ret_df['NotionalMillionths'] * transaction_cost
    ret_df['VWAPMillionths'] = (ret_df['NotionalMillionths'].cumsum() / ret_df['SizeBillionths'].cumsum() * (10**9))\
        .apply(int)

    return ret_df

In [None]:
def total_participation_opportunities(full_dict, p):
    global currency_crypto_cost
    global crypto_crypto_cost

    for pair, year in full_dict.keys():
        if pair == 'ETH-BTC':
            transaction_cost = crypto_crypto_cost
        else:
            transaction_cost = currency_crypto_cost
        df_Buy = full_dict[(pair, year)][Order.Buy.value]
        dir = r'./data/' + pair + '_' + year + '_total_buy_opportunities.csv'
        VWAP_participation(df_Buy, np.infty, p, df_Buy.index[0][0], Order.Buy, transaction_cost).to_csv(dir)
        df_Sell = full_dict[(pair, year)][Order.Sell.value]
        dir = r'./data/' + pair + '_' + year + '_total_sell_opportunities.csv'
        VWAP_participation(df_Sell, np.infty, p, df_Sell.index[0][0], Order.Sell, transaction_cost).to_csv(dir)

<h2> 2. Data </h2>

<h3> 2-1. Raw Data </h3>

<h3> 2-2. Data Processing </h3>

In [18]:
data_dict = data_factory(pairs, years)

>>> Found data...


In [19]:
BTC_USD_2021_Buy = data_dict[('BTC-USD', '2021')][Order.Buy.value]

In [23]:
VWAP_participation(BTC_USD_2021_Buy, 1000000000, 0.05, BTC_USD_2021_Buy.index[0][0], Order.Buy, currency_crypto_cost)

Unnamed: 0_level_0,Unnamed: 1_level_0,PriceMillionths,SizeBillionths,NotionalMillionths,TradingCostsMillionths,VWAPMillionths
datetime,Nano seconds,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-10 16:28:53,470961000,59122080000,189904.5,1.122755e+07,5.613775e+04,59122080000
2021-04-10 16:28:53,596148000,59119960000,126228.5,7.462624e+06,3.731312e+04,59121233506
2021-04-10 16:28:54,64537000,59119960000,27776.5,1.642146e+06,8.210728e+03,59121130649
2021-04-10 16:28:54,835128000,59123810000,984083.5,5.818277e+07,2.909138e+05,59123116130
2021-04-10 16:28:55,30223000,59123810000,11798.0,6.975427e+05,3.487714e+03,59123122240
...,...,...,...,...,...,...
2021-04-10 16:44:06,398627000,59120480000,8427.5,4.982378e+05,2.491189e+03,59118379271
2021-04-10 16:44:07,19364000,59120480000,23597.0,1.395066e+06,6.975330e+03,59118379321
2021-04-10 16:44:07,361807000,59120480000,174320.0,1.030588e+07,5.152941e+04,59118379688
2021-04-10 16:44:09,243497000,59120480000,21101.0,1.247501e+06,6.237506e+03,59118379733


<h3> 2-3. Data Description </h3>

<h2> 3. Analysis </h2>