In [1]:
import sys
sys.path.append('..')

from data_parsers import databento_file_parser
from executor import SimpleExecutor1
from itertools import product

import pandas as pd
import math
import numpy as np

from tqdm import tqdm
import math
import numpy
import matplotlib.pyplot as plt
import random
import numpy as np

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 80)

In [2]:
# Marco Avellaneda & Sasha Stoikov

''' 
This algo give bid and ask quote based on the Sasha Stoikov and Marco Avellaneda Model

Input:
- mid_price = current mid price
- sigma = market volatility    
- q = current position    
- qmax = maximum position
- gamma = 0.1, risk aversion parameter
- k = 1.5, Market model parameter
'''

def quote_ho_and_stoll(mid_price, q, s = 0.06, a = 0.01, b=0.01):

    sign = -1 if q > 0 else +1
    abs_q = np.abs(q)

    d = sign * (a * abs_q + b * abs_q ** 2) / 100
    
    ra = mid_price + s / 2 + d / 2
    rb = mid_price - s / 2 + d / 2
    
    ra = ra.round(2)
    rb = rb.round(2)
        
    return rb, ra

quote_ho_and_stoll(100, -200, s = 0.04, a = 0.01, b=0.0001)

(100.01, 100.05)

In [3]:
params = {'MSFT': {'s': 0.06, 'a': 0.01, 'b': 0.0001}, 
          'AMZN': {'s': 0.05, 'a': 0.01, 'b': 0.0001},
          'NVDA': {'s': 0.05, 'a': 0.01, 'b': 0.0001},
          }

class BOT1(SimpleExecutor1):

    def calculate_bid_ask_price(self, bid_orderbook, ask_orderbook, inventory):
        """
        Compute the bid and ask prices based on orderbook status and inventory
        Needs to be overwritten by the model
        """
        mid_price = (ask_orderbook[0][0] + bid_orderbook[0][0])/2
        bid, ask = quote_ho_and_stoll(mid_price, inventory, 
                                      s = params[self.ticker]['s'], a = params[self.ticker]['a'], b=params[self.ticker]['b'])
        if self.is_verbose_cnt():
            print(f"{mid_price=} {bid=} {ask=} {self.sigma_1min=:.4f} {self.time_frac_elapsed=:.4f}")

        return bid, ask

model_name = 'ho_and_stoll'

In [4]:
# TRAIN_DATES = ['20230515', '20230516']
# TEST_DATES = ['20230517', '20230518', '20230519']

TICKERS = ['AMZN', 'MSFT', 'NVDA']
TEST_DATES = ['20230515', '20230516', '20230517', '20230518', '20230519']
# TEST_DATES = ['20230515']

LATENCY = [0, 5, 10]

TEST_SAMPLE = list(product(TICKERS, TEST_DATES))
TEST_SAMPLE

[('AMZN', '20230515'),
 ('AMZN', '20230516'),
 ('AMZN', '20230517'),
 ('AMZN', '20230518'),
 ('AMZN', '20230519'),
 ('MSFT', '20230515'),
 ('MSFT', '20230516'),
 ('MSFT', '20230517'),
 ('MSFT', '20230518'),
 ('MSFT', '20230519'),
 ('NVDA', '20230515'),
 ('NVDA', '20230516'),
 ('NVDA', '20230517'),
 ('NVDA', '20230518'),
 ('NVDA', '20230519')]

In [5]:
output = {}
for latency in LATENCY:
    for ticker, date in TEST_SAMPLE:
        data_file_path = f'../data/databento/{ticker}/xnas-itch-{date}.mbp-10.dbn.zst'
        data_df = databento_file_parser(data_file_path)
        print(f"\n*** Starting for {ticker=}, {date=}, {latency=} ***")
        print(len(data_df))
        for start_ts, group_df in data_df.resample('30min'):
            print(start_ts, len(group_df))
            bot1 = BOT1(ticker=ticker, latency=latency)
            bot1.run_sim(group_df)
            output[(ticker, latency, start_ts)] = bot1.output_data


*** Starting for ticker='AMZN', date='20230515', latency=0 ***
1602775
2023-05-15 14:00:00 208799
mid_price=110.035 bid=110.01 ask=110.06 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-7421.679999999997 position=67 pnl=4.330000000003707 trading_pnl=109.39000000000554 trading_volume=6941 avg_size=14.076532841566122 avg_size_square=420.2973752151014
2023-05-15 14:30:00 155829
mid_price=109.805 bid=109.78 ask=109.83 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-4083.5099999999848 position=37 pnl=36.78000000001519 trading_pnl=91.31499999999758 trading_volume=5473 avg_size=14.631633818185893 avg_size_square=376.4718044273715
2023-05-15 15:00:00 128587
mid_price=109.875 bid=109.85 ask=109.9 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=3302.0200000000086 position=-30 pnl=24.400000000008895 trading_pnl=56.56000000000842 trading_volume=3768 avg_size=18.105425732179164 avg_size_square=579.3816901516034
2023-05-15 15:30:00 142390
mid_price=110.525 bid=110.5 

In [6]:
out_df = pd.DataFrame(output).T.reset_index()
del out_df['trades']
del out_df['quotes']

out_df.columns = ['ticker', 'latency', 'start_ts'] + list(out_df.columns)[3:]
out_df['utility'] = 100 * (out_df['net_pnl'] + 2 * out_df['trading_pnl'] - 0.01 * out_df['avg_size_square']) / out_df['trading_volume']

print(out_df.groupby(['ticker', 'latency']).mean())
print(out_df.groupby(['ticker', 'latency']).std())

out_df.to_csv(f'backtest_output_{model_name}.csv')
out_df

                                    start_ts eod_position     eod_cash  \
ticker latency                                                           
AMZN   0       2023-05-17 16:44:59.999999744    -8.783333  1016.000833   
       5       2023-05-17 16:44:59.999999744    -5.316667   602.133667   
       10      2023-05-17 16:44:59.999999744    -1.783333   185.961667   
MSFT   0       2023-05-17 16:44:59.999999744     1.466667  -435.491833   
       5       2023-05-17 16:44:59.999999744   -11.916667  3725.929333   
       10      2023-05-17 16:44:59.999999744    -5.816667  1809.320333   
NVDA   0       2023-05-17 16:44:59.999999744     3.966667   -1308.2335   
       5       2023-05-17 16:44:59.999999744     1.416667  -796.069833   
       10      2023-05-17 16:44:59.999999744     0.516667  -435.133333   

                  net_pnl trading_pnl   avg_size avg_size_square  \
ticker latency                                                     
AMZN   0        27.143667   74.991417  14.116874 

Unnamed: 0,ticker,latency,start_ts,eod_position,eod_cash,net_pnl,trading_pnl,avg_size,avg_size_square,trading_volume,volatility,net_return,utility
0,AMZN,0,2023-05-15 14:00:00,67,-7421.68,4.33,109.39,14.076533,420.297375,6941,0.042363,0.295,3.153825
1,AMZN,0,2023-05-15 14:30:00,37,-4083.51,36.78,91.315,14.631634,376.471804,5473,0.033384,-0.07,3.940166
2,AMZN,0,2023-05-15 15:00:00,-30,3302.02,24.4,56.56,18.105426,579.38169,3768,0.024774,-0.67,3.495918
3,AMZN,0,2023-05-15 15:30:00,-54,5951.01,18.6,54.66,14.672268,381.564298,3324,0.024531,0.06,3.733585
4,AMZN,0,2023-05-15 16:00:00,71,-7836.0,-3.315,36.105,10.791926,344.319009,2091,0.019413,0.49,3.130168
...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,NVDA,10,2023-05-19 17:30:00,118,-36849.9,17.22,70.91,3.082649,31.62264,2398,0.064872,0.92,6.619006
536,NVDA,10,2023-05-19 18:00:00,36,-11331.34,-54.38,109.595,4.330805,63.230445,3650,0.065834,0.0,4.498019
537,NVDA,10,2023-05-19 18:30:00,-54,16923.08,51.12,79.44,3.803711,74.347084,2950,0.061751,-0.76,7.093442
538,NVDA,10,2023-05-19 19:00:00,-34,10601.09,6.28,90.125,2.726019,21.048044,3580,0.053526,0.35,5.204456


In [7]:
trades_df = [] 
quotes_df = []

for key in output:
    o = output[key]
    df = o['trades']
    df['ticker'] = key[0]
    df['latency'] = key[1]
    df['start_ts'] = key[2]
    trades_df.append(df)

    df = o['quotes']
    df['ticker'] = key[0]
    df['latency'] = key[1]
    df['start_ts'] = key[2]
    quotes_df.append(df)

trades_df = pd.concat(trades_df)
quotes_df = pd.concat(quotes_df)
    
trades_df.to_csv(f'backtest_output_{model_name}_trades.csv')
quotes_df.to_csv(f'backtest_output_{model_name}_quotes.csv')