In [1]:
import sys
sys.path.append('..')

from data_parsers import databento_file_parser
from executor import SimpleExecutor1

import pandas as pd
import math
import numpy as np

from tqdm import tqdm
import math
import numpy
import matplotlib.pyplot as plt
import random
import numpy as np

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 80)

In [2]:
# Marco Avellaneda & Sasha Stoikov

''' 
This algo give bid and ask quote based on the Sasha Stoikov and Marco Avellaneda Model

Input:
- mid_price = current mid price
- sigma = market volatility    
- q = current position    
- qmax = maximum position
- gamma = 0.1, risk aversion parameter
- k = 1.5, Market model parameter
'''

def quote_ho_and_stoll(mid_price, q, a = 0.01, b=0.01):

    s = 0.06

    sign = -1 if q > 0 else +1
    abs_q = np.abs(q)

    d = sign * (a * abs_q + b * abs_q ** 2) / 100
    
    ra = mid_price + s / 2 + d / 2
    rb = mid_price - s / 2 + d / 2
    
    ra = ra.round(2)
    rb = rb.round(2)
        
    return rb, ra

quote_ho_and_stoll(10, -200, a = 0.01, b=0.0001)

(10.0, 10.06)

In [3]:
class BOT1(SimpleExecutor1):

    def calculate_bid_ask_price(self, bid_orderbook, ask_orderbook, inventory):
        """
        Compute the bid and ask prices based on orderbook status and inventory
        Needs to be overwritten by the model
        """
        mid_price = (ask_orderbook[0][0] + bid_orderbook[0][0])/2
        bid, ask = quote_ho_and_stoll(mid_price, inventory, a = 0.01, b=0.0001)
        if self.is_verbose_cnt():
            print(f"{mid_price=} {bid=} {ask=} {self.sigma_1min=:.4f} {self.time_frac_elapsed=:.4f}")

        return bid, ask


In [4]:
from itertools import product

# TICKERS = ['AMZN', 'MSFT', 'APPL']
# TRAIN_DATES = ['20230515', '20230516']
# TEST_DATES = ['20230517', '20230518', '20230519']

TICKERS = ['MSFT']
TEST_DATES = ['20230515', '20230516', '20230517', '20230518', '20230519']
# TEST_DATES = ['20230519']


# TRAIN_SAMPLE = list(product(TICKERS, TRAIN_DATES))
# TRAIN_SAMPLE

TEST_SAMPLE = list(product(TICKERS, TEST_DATES))
TEST_SAMPLE

[('MSFT', '20230515'),
 ('MSFT', '20230516'),
 ('MSFT', '20230517'),
 ('MSFT', '20230518'),
 ('MSFT', '20230519')]

In [5]:
output = {}
for ticker, date in TEST_SAMPLE:
    data_file_path = f'../data/databento/{ticker}/xnas-itch-{date}.mbp-10.dbn.zst'
    data_df = databento_file_parser(data_file_path)
    print(f"*** Starting for {ticker=} and {date=} ***")
    print(len(data_df))
    for start_ts, group_df in data_df.resample('30min'):
        print(start_ts, len(group_df))
        bot1 = BOT1(ticker=ticker, print_freq=150_000)
        bot1.run_sim(group_df)
        output[(ticker, start_ts)] = bot1.output_data
        # print(output[(ticker, start_ts)])

*** Starting for ticker='MSFT' and date='20230515' ***
1248264
2023-05-15 14:00:00 148372
mid_price=308.81 bid=308.78 ask=308.84 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-7104.419999999984 position=23 pnl=-5.5699999999842476 trading_pnl=45.645000000002824 trading_volume=2175 avg_size=5.18944351622846 avg_size_square=58.4937718706461
2023-05-15 14:30:00 143007
mid_price=307.665 bid=307.64 ask=307.7 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-22878.200000000004 position=74 pnl=22.999999999995342 trading_pnl=52.45000000000721 trading_volume=2188 avg_size=6.8825384509142316 avg_size_square=138.39785890346457
2023-05-15 15:00:00 110186
mid_price=309.185 bid=309.16 ask=309.21 self.sigma_1min=nan self.time_frac_elapsed=0.0000
capital=-1221.1999999999825 position=4 pnl=36.880000000017446 trading_pnl=36.83000000000578 trading_volume=1956 avg_size=7.402431479188921 avg_size_square=173.25254392232355
2023-05-15 15:30:00 96071
mid_price=309.57 bid=309.54 ask=309.6 s

In [12]:
model_name = 'ho_and_stoll'

out_df = pd.DataFrame(output).T
del out_df['trades']
del out_df['quotes']

out_df['utility'] = 100 * (out_df['net_pnl'] + 2 * out_df['trading_pnl'] - 0.01 * out_df['avg_size_square']) / out_df['trading_volume']

print(out_df.mean())
print(out_df.std())

out_df.to_csv(f'backtest_output_{model_name}.csv')
out_df

trades_df = pd.concat([o['trades'] for o in output.values()])
trades_df.to_csv(f'backtest_output_{model_name}_trades.csv')

quotes_df = pd.concat([o['quotes'] for o in output.values()])
quotes_df.to_csv(f'backtest_output_{model_name}_quotes.csv')

eod_position              4.45
eod_cash          -1389.260333
net_pnl                17.1125
trading_pnl           56.76625
avg_size              7.107574
avg_size_square     121.646773
trading_volume     2265.883333
volatility            0.045957
net_return              -0.105
utility               5.469845
dtype: object
eod_position          35.280198
eod_cash           11033.035288
net_pnl                28.97548
trading_pnl           42.013307
avg_size               1.522605
avg_size_square        54.78434
trading_volume      1308.447289
volatility             0.012127
net_return             0.564748
utility                1.712752
dtype: object
