In [1]:
import sys
sys.path.append('..')

from data_parsers import databento_file_parser
from executor import SimpleExecutor1
from itertools import product

import pandas as pd
import math
import numpy as np

from tqdm import tqdm
import math
import numpy
import matplotlib.pyplot as plt
import random
import numpy as np

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 80)

In [2]:
# Marco Avellaneda & Sasha Stoikov

''' 
This algo give bid and ask quote based on the Sasha Stoikov and Marco Avellaneda Model

Input:
- mid_price = current mid price
- sigma = market volatility    
- q = current position    
- qmax = maximum position
- gamma = 0.1, risk aversion parameter
- k = 1.5, Market model parameter
'''

def quote_ho_and_stoll(mid_price, q, a = 0.01, b=0.01):

    s = 0.06

    sign = -1 if q > 0 else +1
    abs_q = np.abs(q)

    d = sign * (a * abs_q + b * abs_q ** 2) / 100
    
    ra = mid_price + s / 2 + d / 2
    rb = mid_price - s / 2 + d / 2
    
    ra = ra.round(2)
    rb = rb.round(2)
        
    return rb, ra

quote_ho_and_stoll(10, -200, a = 0.01, b=0.0001)

(10.0, 10.06)

In [3]:
class BOT1(SimpleExecutor1):

    def calculate_bid_ask_price(self, bid_orderbook, ask_orderbook, inventory):
        """
        Compute the bid and ask prices based on orderbook status and inventory
        Needs to be overwritten by the model
        """
        mid_price = (ask_orderbook[0][0] + bid_orderbook[0][0])/2
        bid, ask = quote_ho_and_stoll(mid_price, inventory, a = 0.01, b=0.0001)
        if self.is_verbose_cnt():
            print(f"{mid_price=} {bid=} {ask=} {self.sigma_1min=:.4f} {self.time_frac_elapsed=:.4f}")

        return bid, ask

model_name = 'ho_and_stoll'

In [8]:
# TICKERS = ['AMZN', 'MSFT', 'APPL']
# TRAIN_DATES = ['20230515', '20230516']
# TEST_DATES = ['20230517', '20230518', '20230519']

TICKERS = ['MSFT']
TEST_DATES = ['20230515', '20230516', '20230517', '20230518', '20230519']
LATENCY = [0, 5, 10]

TEST_SAMPLE = list(product(TICKERS, TEST_DATES))
TEST_SAMPLE

[('MSFT', '20230515'),
 ('MSFT', '20230516'),
 ('MSFT', '20230517'),
 ('MSFT', '20230518'),
 ('MSFT', '20230519')]

In [9]:
output = {}
for latency in LATENCY:
    for ticker, date in TEST_SAMPLE:
        data_file_path = f'../data/databento/{ticker}/xnas-itch-{date}.mbp-10.dbn.zst'
        data_df = databento_file_parser(data_file_path)
        print(f"\n*** Starting for {ticker=}, {date=}, {latency=} ***")
        print(len(data_df))
        for start_ts, group_df in data_df.resample('30min'):
            print(start_ts, len(group_df))
            bot1 = BOT1(ticker=ticker, latency=latency)
            bot1.run_sim(group_df)
            output[(ticker, latency, start_ts)] = bot1.output_data


*** Starting for ticker='MSFT', date='20230515', latency=0 ***
1248264
2023-05-15 14:00:00 148372
mid_price=308.81 bid=308.78 ask=308.84 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-22809.869999999984 position=74 pnl=-18.749999999985157 trading_pnl=45.25500000000239 trading_volume=2132 avg_size=4.294849542963435 avg_size_square=39.332015539940784
2023-05-15 14:30:00 143007
mid_price=307.665 bid=307.64 ask=307.7 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-21943.010000000002 position=71 pnl=29.719999999996798 trading_pnl=50.57500000000472 trading_volume=2095 avg_size=5.147802555819608 avg_size_square=70.53491763626867
2023-05-15 15:00:00 110186
mid_price=309.185 bid=309.16 ask=309.21 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-921.5199999999841 position=3 pnl=25.770000000015997 trading_pnl=34.38500000000731 trading_volume=1861 avg_size=7.172096426324777 avg_size_square=123.19704584896125
2023-05-15 15:30:00 96071
mid_price=309.57 bid=309.54 as

In [10]:
out_df = pd.DataFrame(output).T.reset_index()
del out_df['trades']
del out_df['quotes']

out_df.columns = ['ticker', 'latency', 'start_ts'] + list(out_df.columns)[3:]
out_df['utility'] = 100 * (out_df['net_pnl'] + 2 * out_df['trading_pnl'] - 0.01 * out_df['avg_size_square']) / out_df['trading_volume']

print(out_df.groupby(['ticker', 'latency']).mean())
print(out_df.groupby(['ticker', 'latency']).std())

out_df.to_csv(f'backtest_output_{model_name}.csv')
out_df

                                    start_ts eod_position     eod_cash  \
ticker latency                                                           
MSFT   0       2023-05-17 16:44:59.999999744     1.466667  -435.491833   
       5       2023-05-17 16:44:59.999999744   -11.916667  3725.929333   
       10      2023-05-17 16:44:59.999999744    -5.816667  1809.320333   

                  net_pnl trading_pnl  avg_size avg_size_square  \
ticker latency                                                    
MSFT   0          18.9575   53.414917  7.020246      115.892719   
       5           22.409   50.054417  5.857464       83.948683   
       10       21.560667    50.49275  5.951549       87.641085   

               trading_volume volatility net_return   utility  
ticker latency                                                 
MSFT   0               2187.5   0.045957     -0.105  5.493972  
       5          2748.016667   0.045957     -0.105  4.406604  
       10         2831.216667   0.045

Unnamed: 0,ticker,latency,start_ts,eod_position,eod_cash,net_pnl,trading_pnl,avg_size,avg_size_square,trading_volume,volatility,net_return,utility
0,MSFT,0,2023-05-15 14:00:00,74,-22809.87,-18.75,45.255,4.29485,39.332016,2132,0.064561,1.155,3.347405
1,MSFT,0,2023-05-15 14:30:00,71,-21943.01,29.72,50.575,5.147803,70.534918,2095,0.065185,-1.565,6.21311
2,MSFT,0,2023-05-15 15:00:00,3,-921.52,25.77,34.385,7.172096,123.197046,1861,0.05035,-0.445,5.013865
3,MSFT,0,2023-05-15 15:30:00,37,-11423.13,49.69,31.365,6.06178,70.393226,1207,0.045363,-0.19,9.255681
4,MSFT,0,2023-05-15 16:00:00,6,-1815.97,52.33,38.38,5.783605,82.255848,1358,0.03428,0.705,9.44532
...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,MSFT,10,2023-05-19 17:30:00,21,-6653.81,30.02,26.84,7.416965,110.383076,1549,0.031336,0.59,5.332225
176,MSFT,10,2023-05-19 18:00:00,11,-3487.87,31.64,36.82,5.307929,67.942903,2173,0.040733,-0.52,4.813648
177,MSFT,10,2023-05-19 18:30:00,15,-4740.83,52.89,39.49,6.18815,86.810744,2297,0.034842,-0.015,5.703173
178,MSFT,10,2023-05-19 19:00:00,53,-16860.84,14.84,31.68,7.755813,120.824781,1903,0.032988,-0.075,4.045809


In [11]:
trades_df = [] 
quotes_df = []

for key in output:
    o = output[key]
    df = o['trades']
    df['ticker'] = key[0]
    df['latency'] = key[1]
    df['start_ts'] = key[2]
    trades_df.append(df)

    df = o['quotes']
    df['ticker'] = key[0]
    df['latency'] = key[1]
    df['start_ts'] = key[2]
    quotes_df.append(df)

trades_df = pd.concat(trades_df)
quotes_df = pd.concat(quotes_df)
    
trades_df.to_csv(f'backtest_output_{model_name}_trades.csv')
quotes_df.to_csv(f'backtest_output_{model_name}_quotes.csv')