In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

(parent_folder_path, current_dir) = os.path.split(os.path.abspath(''))
sys.path.append(parent_folder_path)
sys.path.append(os.path.join(parent_folder_path, 'simulator'))
sys.path.append(os.path.join(parent_folder_path, 'equities/data_processing'))

os.environ["XLA_PYTHON_CLIENT_ALLOCATOR"] = "platform"

import numpy as np
import pandas as pd
import random
from typing import List, Tuple
from copy import deepcopy
from tqdm import tqdm
from glob import glob
from decimal import Decimal
from contextlib import nullcontext
import torch

from simulator.core import Message
from simulator.markets.order_book import OrderBook
from simulator.markets.orders import LimitOrder, Side, MarketOrder
from equities.data_processing import itch_preproc
from equities.data_processing import itch_encoding
from equities.model import GPTConfig, GPT
from equities.data_processing import itch_encoding

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# INIT PARAMS
# -----------------------------------------------------------------------------
init_from = 'resume' # either 'resume' (from an out_dir) or a gpt2 variant (e.g. 'gpt2-xl')
out_dir = parent_folder_path + '/out' # ignored if init_from is not 'resume'
# dataset = '12302019.NASDAQ_ITCH50_AAPL_message_proc.npy' # dataset to use for initial prompt
# start = "\n" # or "<|endoftext|>" or etc. Can also specify a file, use as: "FILE:prompt.txt"
num_context_msgs = 400 # number of messages from dataset to use as context
# num_samples = 10 # number of samples to draw
num_samples = 1 # number of samples to draw (think of like monte carlo paths)
# max_new_tokens = 500 # number of tokens generated in each sample
max_new_tokens = 1 # number of tokens generated in each sample (think of like time steps)
temperature = 0.8 # 1.0 = no change, < 1.0 = less random, > 1.0 = more random, in predictions
top_k = 200 # retain only the top_k most likely tokens, clamp others to have 0 probability
seed = 42
device = 'cuda' # examples: 'cpu', 'cuda', 'cuda:0', 'cuda:1', etc.
dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
compile = False # use PyTorch 2.0 to compile the model to be faster
# exec(open('equities/configurator.py').read()) # overrides from command line or config file
# -----------------------------------------------------------------------------

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
device_type = 'cuda' if 'cuda' in device else 'cpu' # for later use in torch.autocast
ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
ctx = nullcontext() if device_type == 'cpu' else torch.amp.autocast(device_type=device_type, dtype=ptdtype)


In [4]:
TIME = 0

class FakeExchangeAgent:
    def __init__(self):
        self.messages = []
        self.current_time = TIME
        self.mkt_open = TIME
        self.book_logging = None
        self.stream_history = 10

    def reset(self):
        self.messages = []

    def send_message(self, recipient_id: int, message: Message, _: int = 0):
        self.messages.append((recipient_id, message))

    def logEvent(self, *args, **kwargs):
        pass

In [5]:
# define load paths
raw_itch_load_path = parent_folder_path + '/dataset/raw/ITCH/'
processed_dataset = '03272019.NASDAQ_ITCH50_AAPL_message_proc.npy'
proc_data_dir = os.path.join('dataset/proc/ITCH/full_view/', processed_dataset)
proc_data_dir = parent_folder_path + '/' + proc_data_dir
symbols_load_path = parent_folder_path + '/dataset/symbols/'
symbols_file = sorted(glob(symbols_load_path + '*sp500*.txt'))[0]

# locate raw ITCH data
itch_message_files = sorted(glob(raw_itch_load_path + '*message*.csv'))
itch_book_files = sorted(glob(raw_itch_load_path + '*book*.csv'))
print('found', len(itch_message_files), 'ITCH message files')
print('found', len(itch_book_files), 'ITCH book files')

# create reverse ticker symbol mapping (key is index, value is ticker)
tickers = {}
with open(symbols_file) as f:
    idx = 0
    for line in f:
        idx += 1
        # tickers[line.strip()] = idx
        tickers[idx] = line.strip()

# load raw ITCH data (book)
symbols = []
for m_f, b_f in tqdm(zip(itch_message_files, itch_book_files)):
    if '03272019' not in m_f:
        continue
    print(m_f)
    # print(b_f)
    
    first_message = (itch_preproc.load_message_df(m_f)).iloc[0]
    print(first_message)

    # itch_book = pd.read_csv(
    #     b_f,
    #     # index_col=False,
    #     # header=None
    # )
    # assert len(itch_messages) == len(itch_book)

    # # remove disallowed order types
    # allowed_events=['A','E','C','D','R']
    # itch_messages = itch_messages.loc[itch_messages.type.isin(allowed_events)]
    # # make sure book is same length as messages
    # itch_book = itch_book.loc[itch_messages.index]

    # print("ITCH messages shape:", itch_messages.shape)
    # print("ITCH book shape:", itch_book.shape)

    # # remove time field from ITCH book data
    # itch_book = itch_book.drop(columns=['time'])

    # symbol to store in list and use to create OB objects in loop later
    symbol = m_f.rsplit('/', maxsplit=1)[-1][:-12].rsplit('_', maxsplit=1)[-1]
    print("Adding symbol:", symbol)
    symbols.append(symbol)

# load processed ITCH data (messages)
# proc_messages = np.array(np.load(proc_data_dir, mmap_mode='r')[0:num_context_msgs])
proc_messages = np.array(np.load(proc_data_dir, mmap_mode='r')[0:12349])
# assert len(itch_book) == len(proc_messages) + 1 # off by 1 bc of first message
print("proc_messages.shape:", proc_messages.shape)
print("proc_messages:", proc_messages)
print([ "ticker", "order_id",
        "event_type", "direction", "price_abs", "price", "fill_size", "remain_size",
        "delta_t_s", "delta_t_ns", "time_s", "time_ns", "old_id",
        "price_ref", "fill_size_ref", "time_s_ref", "time_ns_ref", "old_price_abs"])

found 8 ITCH message files
found 8 ITCH book files


0it [00:00, ?it/s]

/home/aaron/Documents/Github/MarketSimT/dataset/raw/ITCH/03272019.NASDAQ_ITCH50_AAPL_message.csv


8it [00:00, 13.98it/s]

time        14400006432545
type                     A
id                   13301
side                     1
size                  18.0
price               207.85
cancSize               NaN
execSize               NaN
oldId                  NaN
oldSize                NaN
oldPrice               NaN
mpid                   NaN
Name: 0, dtype: object
Adding symbol: AAPL
proc_messages.shape: (12349, 18)
proc_messages: [[      40    15969        1 ...    -9999    -9999    -9999]
 [      40    20677        1 ...    -9999    -9999    -9999]
 [      40    22061        1 ...    -9999    -9999    -9999]
 ...
 [      40  9482373        4 ...    34200 59161876    -9999]
 [      40  9706953        1 ...    -9999    -9999    -9999]
 [      40  9706957        1 ...    -9999    -9999    -9999]]
['ticker', 'order_id', 'event_type', 'direction', 'price_abs', 'price', 'fill_size', 'remain_size', 'delta_t_s', 'delta_t_ns', 'time_s', 'time_ns', 'old_id', 'price_ref', 'fill_size_ref', 'time_s_ref', 'time_ns_re




In [6]:
# init new book under nasdaq agent
nasdaq_agent = FakeExchangeAgent()

# create a dictionary of order books based on each symbol in symbols
print("Creating order books for symbols:", symbols)
order_books = {}
for symbol in symbols:
    order_books[symbol] = OrderBook(nasdaq_agent, symbol)

# empty book
assert order_books[tickers[proc_messages[0][0]]].bids == order_books[tickers[proc_messages[0][0]]].asks == []

Creating order books for symbols: ['AAPL']


In [7]:
# first message is missing in proc_messages, so we'll use the raw message file to start the book
print(first_message)

# insert bid order
bid_order = LimitOrder(
    order_id=first_message['id'],
    agent_id=1, # world agent, leave alone for now
    time_placed=first_message['time'],
    symbol=symbols[0],
    quantity=int(first_message['size']),
    side=Side.BID if first_message['side'] == 0 else Side.ASK,
    limit_price=int(first_message['price']*100),
)
order_books[symbols[0]].handle_limit_order(bid_order)

print("L3 bid data:", order_books[symbols[0]].get_l3_bid_data())
print("L3 ask data:", order_books[symbols[0]].get_l3_ask_data())
nasdaq_agent.messages

time        14400006432545
type                     A
id                   13301
side                     1
size                  18.0
price               207.85
cancSize               NaN
execSize               NaN
oldId                  NaN
oldSize                NaN
oldPrice               NaN
mpid                   NaN
Name: 0, dtype: object
L3 bid data: []
L3 ask data: [(20785, [18])]


[(1,
  OrderAcceptedMsg(message_id=1, order=(Agent 1 @ 1970-01-01 04:00:00) : ASK 18 AAPL @ $207.85))]

In [8]:
# INIT ORDER BOOKS FROM PROCESSED CONTEXT DATA

# [ "ticker", "order_id",
# "event_type", "direction", "price_abs", "price", "fill_size", "remain_size",
# "delta_t_s", "delta_t_ns", "time_s", "time_ns", "old_id",
# "price_ref", "fill_size_ref", "time_s_ref", "time_ns_ref", "old_price_abs"]

# init variables to keep track of previous time, price, etc.
prev_time = first_message['time']
prev_price = int(first_message['price']*100)

# iterate through messages and update order books
for msg in proc_messages:
    # print(msg)
    symbol = tickers[msg[0]]
    # print("Symbol:", symbol)
    order_id = msg[1]
    # print("Order ID:", order_id)
    event_type = msg[2]
    # print("Event Type:", event_type)
    price = msg[4]
    # print("Price:", price)

    # verify time correctness
    assert prev_time + (msg[8]*1000000000) + msg[9] == (msg[10] * 1000000000) + msg[11]
    time = prev_time + (msg[8]*1000000000) + msg[9]
    # print("Time:", time)

    # handle order based on event type
    if event_type == 1:
        # ADD LIMIT ORDER
        direction = Side.BID if msg[3] == 0 else Side.ASK
        # print("Direction:", direction)
        fill_size = msg[6]
        # print("Fill Size:", fill_size)
        order = LimitOrder(
            order_id=order_id,
            agent_id=1, # world agent, leave alone for now
            time_placed=time,
            symbol=symbol,
            quantity=fill_size,
            side=direction,
            limit_price=price,
        )
        order_books[symbol].handle_limit_order(order)
    elif event_type == 2:
        # EXECUTE ORDER
        fill_size = msg[6]
        # print("Fill Size:", fill_size)
        direction = Side.BID if msg[3] == 1 else Side.ASK # opposite of direction in non-execution messages
        # print("Direction:", direction)
        order = MarketOrder(
            order_id=order_id,
            agent_id=1, # world agent, leave alone for now
            time_placed=time,
            symbol=symbol,
            quantity=fill_size,
            side=direction, # Buy Order if Side.BID (remove liquidity from ask side), Sell Order if Side.ASK (remove liquidity from bid side)
        )
        order_books[symbol].handle_market_order(order)
    elif event_type == 3:
        # EXECUTE ORDER WITH PRICE DIFFERENT THAN DISPLAY
        # This order type is most likely an execution of a price-to-comply order, which is handled by the simulator
        # but this not encoded in the ITCH data beforehand, so we cannot know whether an order is price-to-comply at the time of submission
        # therefore, we handle this event type as a modifed order and then regular execution order (for now, until we revise the data processing)

        # modfify the matched limit order
        direction = Side.BID if msg[3] == 0 else Side.ASK
        ref_order_time = (msg[15] * 1000000000) + msg[16]
        ref_order_size = msg[14]
        ref_order_price = msg[17] # old_price_abs (not mid_price so we cannot calculate using price_ref msg[13])
        # define original order
        original_order = LimitOrder(
            order_id=order_id,
            agent_id=1, # world agent, leave alone for now
            time_placed=ref_order_time,
            symbol=symbol,
            quantity=ref_order_size,
            side=direction,
            limit_price=ref_order_price,
        )
        # define modified order
        # modified_price = msg[4]
        modified_order = LimitOrder(
            order_id=order_id,
            agent_id=1, # world agent, leave alone for now
            time_placed=ref_order_time,
            symbol=symbol,
            quantity=ref_order_size,
            side=direction,
            limit_price=price,
        )
        order_books[symbol].modify_order(original_order, modified_order)
        # execute the modified order
        fill_size = msg[6]
        # print("Fill Size:", fill_size)
        direction = Side.BID if msg[3] == 1 else Side.ASK # opposite of direction in non-execution messages
        # print("Direction:", direction)
        order = MarketOrder(
            order_id=order_id,
            agent_id=1, # world agent, leave alone for now
            time_placed=time,
            symbol=symbol,
            quantity=fill_size,
            side=direction, # Buy Order if Side.BID (remove liquidity from ask side), Sell Order if Side.ASK (remove liquidity from bid side)
        )
        order_books[symbol].handle_market_order(order)
    elif event_type == 4:
        # CANCEL ORDER
        direction = Side.BID if msg[3] == 0 else Side.ASK
        # print("Direction:", direction)
        ref_order_time = (msg[15] * 1000000000) + msg[16]
        if msg[7] == 0:
            # FULL DELETION
            fill_size = msg[6]
            # print("Cancel Size:", fill_size)
            order = LimitOrder(
                order_id=order_id,
                agent_id=1, # world agent, leave alone for now
                time_placed=ref_order_time,
                symbol=symbol,
                quantity=fill_size,
                # quantity=msg[14], # total size of order when placed
                side=direction,
                limit_price=price,
            )
            order_books[symbol].cancel_order(order)
        else:
            # PARTIAL CANCELLATION
            cancel_size = msg[6]
            # print("Partial Cancel Size:", cancel_size)
            ref_order_size = msg[7] + cancel_size # total size of order before partial cancel
            order = LimitOrder(
                order_id=order_id,
                agent_id=1, # world agent, leave alone for now
                time_placed=ref_order_time,
                symbol=symbol,
                quantity=ref_order_size,
                side=direction,
                limit_price=price,
            )
            order_books[symbol].partial_cancel_order(order, cancel_size)
    elif event_type == 5:
        # REPLACE ORDER
        direction = Side.BID if msg[3] == 0 else Side.ASK
        # print("Direction:", direction)
        old_order_id = msg[12]
        old_order_time = (msg[15] * 1000000000) + msg[16]
        old_order_size = msg[14]
        old_order_price = msg[17] # old_price_abs (not mid_price so we cannot calculate using price_ref msg[13])
        # define old order
        old_order = LimitOrder(
            order_id=old_order_id,
            agent_id=1, # world agent, leave alone for now
            time_placed=old_order_time,
            symbol=symbol,
            quantity=old_order_size,
            side=direction,
            limit_price=old_order_price,
        )
        new_order_size = msg[6]
        # define new order
        new_order = LimitOrder(
            order_id=order_id,
            agent_id=1, # world agent, leave alone for now
            time_placed=time,
            symbol=symbol,
            quantity=new_order_size,
            side=direction,
            limit_price=price,
        )
        order_books[symbol].replace_order(1, old_order, new_order) # first arg is agent_id (world agent)
    else:
        raise NotImplementedError("Event type not implemented")

    # update previous time and price
    prev_time = time
    prev_price = price

# print end result
print("L3 bid data:", order_books[symbols[0]].get_l3_bid_data())
print("L3 ask data:", order_books[symbols[0]].get_l3_ask_data())
nasdaq_agent.messages

L3 bid data: [(18870, [20, 27, 1, 25, 4, 7, 13, 100, 119, 6, 150, 65, 10, 6, 100]), (18869, [100, 1]), (18868, [6]), (18865, [100, 100]), (18863, [15, 80]), (18862, [10]), (18858, [2]), (18849, [31]), (18837, [100]), (18823, [100]), (18821, [400]), (18812, [200]), (18811, [50]), (18810, [500]), (18805, [200]), (18802, [100, 300]), (18800, [4, 1, 1, 1, 1, 1, 1, 50, 100, 100, 200]), (18793, [200]), (18787, [100]), (18785, [100]), (18782, [200]), (18780, [500, 500]), (18777, [100]), (18760, [100]), (18757, [50]), (18753, [500]), (18751, [100, 50]), (18750, [10, 100, 9]), (18731, [100]), (18723, [200]), (18721, [200]), (18710, [50]), (18706, [500]), (18700, [10, 12, 3, 8, 1, 14]), (18688, [50]), (18683, [300]), (18682, [100]), (18680, [10]), (18678, [11]), (18666, [100]), (18660, [50]), (18650, [3]), (18627, [200]), (18625, [300]), (18610, [2]), (18602, [300]), (18600, [10, 2, 5, 10, 1, 1, 2, 300]), (18573, [25]), (18510, [50]), (18502, [300]), (18501, [5]), (18500, [10, 100, 14, 1]), (184

[(1,
  OrderAcceptedMsg(message_id=1, order=(Agent 1 @ 1970-01-01 04:00:00) : ASK 18 AAPL @ $207.85)),
 (1,
  OrderAcceptedMsg(message_id=2, order=(Agent 1 @ 1970-01-01 04:00:00) : BID 100 AAPL @ $129.33)),
 (1,
  OrderAcceptedMsg(message_id=3, order=(Agent 1 @ 1970-01-01 04:00:00) : BID 1 AAPL @ $114.94)),
 (1,
  OrderAcceptedMsg(message_id=4, order=(Agent 1 @ 1970-01-01 04:00:00) : BID 1 AAPL @ $98.39)),
 (1,
  OrderAcceptedMsg(message_id=5, order=(Agent 1 @ 1970-01-01 04:00:03) : ASK 300 AAPL @ $192.70)),
 (1,
  OrderAcceptedMsg(message_id=6, order=(Agent 1 @ 1970-01-01 04:00:03) : ASK 85 AAPL @ $214.90)),
 (1,
  OrderAcceptedMsg(message_id=7, order=(Agent 1 @ 1970-01-01 04:00:03) : ASK 150 AAPL @ $225.00)),
 (1,
  OrderAcceptedMsg(message_id=8, order=(Agent 1 @ 1970-01-01 04:00:03) : ASK 5 AAPL @ $225.00)),
 (1,
  OrderAcceptedMsg(message_id=9, order=(Agent 1 @ 1970-01-01 04:00:05) : BID 25 AAPL @ $185.73)),
 (1,
  OrderAcceptedMsg(message_id=10, order=(Agent 1 @ 1970-01-01 04:00:1

In [9]:
# INIT MODEL
if init_from == 'resume':
    # init from a model saved in a specific directory
    ckpt_path = os.path.join(out_dir, 'ckpt.pt')
    checkpoint = torch.load(ckpt_path, map_location=device)
    gptconf = GPTConfig(**checkpoint['model_args'])
    model = GPT(gptconf)
    state_dict = checkpoint['model']
    unwanted_prefix = '_orig_mod.'
    for k,v in list(state_dict.items()):
        if k.startswith(unwanted_prefix):
            state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
    model.load_state_dict(state_dict)

model.eval()
model.to(device)

number of parameters: 94.57M


GPT(
  (transformer): ModuleDict(
    (wte): Embedding(12515, 768)
    (wpe): Embedding(10367, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x Block(
        (ln_1): LayerNorm()
        (attn): CausalSelfAttention(
          (c_attn): Linear(in_features=768, out_features=2304, bias=False)
          (c_proj): Linear(in_features=768, out_features=768, bias=False)
          (attn_dropout): Dropout(p=0.0, inplace=False)
          (resid_dropout): Dropout(p=0.0, inplace=False)
        )
        (ln_2): LayerNorm()
        (mlp): MLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=False)
          (gelu): GELU(approximate='none')
          (c_proj): Linear(in_features=3072, out_features=768, bias=False)
          (dropout): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm()
  )
  (lm_head): Linear(in_features=768, out_features=12515, bias=False)
)

In [10]:
# encode the context data
vocab = itch_encoding.Vocab()
# take the last 'num_context_msgs' messages as context
X_raw = proc_messages[num_context_msgs:]
print("X_raw[-1]:", X_raw[-1])
X = itch_encoding.encode_msgs(X_raw, vocab.ENCODING)
print("X.shape:", X.shape)
print("X:", X)
# ex. decode the context data (will be missing order id, price_abs, old_id, and old_price_abs)
print("decoded X (last msg):", itch_encoding.decode_msg(X[-1], vocab.ENCODING))
time = itch_encoding.decode_msg(X[-1], vocab.ENCODING)[10] * 1000000000 + itch_encoding.decode_msg(X[-1], vocab.ENCODING)[11]
print("current simulation time:", time)

encoded_tok_len = X.shape[1]
print("encoded_tok_len:", encoded_tok_len)

# prepare context tensor
x = (torch.tensor(X.reshape(-1), dtype=torch.long, device=device)[None, ...])
print("x.shape:", x.shape)
print("x:", x)

X_raw[-1]: [       40   9706957         1         0     18682      -189       100
     -9999         0      9069     34200 606673490     -9999     -9999
     -9999     -9999     -9999     -9999]
X.shape: (11949, 24)
X: [[12051  1006 12010 ...   355   463   461]
 [12051  1003 12010 ...     2     2     2]
 [12051  1006 12011 ...   344   395   206]
 ...
 [12051  1006 12010 ...    62   164   879]
 [12051  1003 12011 ...     2     2     2]
 [12051  1003 12010 ...     2     2     2]]
decoded X (last msg): [       40     -9999         1         0     -9999      -189       100
     -9999         0      9069     34200 606673490     -9999     -9999
     -9999     -9999     -9999     -9999]
current simulation time: 34200606673490
encoded_tok_len: 24
x.shape: torch.Size([1, 286776])
x: tensor([[12051,  1006, 12010,  ...,     2,     2,     2]], device='cuda:0')


In [11]:
# run generation
with torch.no_grad():
    with ctx:
        for k in range(num_samples):
            # y = model.generate(x, max_new_tokens, temperature=temperature, top_k=top_k)
            y = model.generate(x, max_new_tokens*encoded_tok_len, temperature=temperature, top_k=top_k)
            # print(decode(y[0].tolist()))
            # print("last generated msg:", y[0][-1].tolist())
            # print(y[0].tolist())
            print('---------------')
        # print("new sequence", y[0].tolist())
        print("new sequence", y)

---------------
new sequence tensor([[12051,  1006, 12010,  ...,     2,     2,     2]], device='cuda:0')


In [12]:
# print the last message in the generated sequence
print("last generated msg:", y[0][-24:].tolist())

# print(y[0].tolist())
print("y:", y)

# decode the generated sequence
# print("decoded y:", itch_encoding.decode_msg(y[0][-24:].tolist(), vocab.ENCODING))
# print("decoded msg:", itch_encoding.decode_msg(np.array(y[0][-24:].tolist()), vocab.ENCODING))
decoded_msg = itch_encoding.decode_msg(np.array(y[0][-24:].tolist()), vocab.ENCODING)
print("decoded msg:", decoded_msg)
print([ "ticker", "NA_VAL",
        "event_type", "direction", "NA_VAL", "price", "fill_size", "remain_size",
        "delta_t_s", "delta_t_ns", "time_s", "time_ns",
        "NA_VAL", "price_ref", "fill_size_ref", "time_s_ref", "time_ns_ref", "NA_VAL"])


last generated msg: [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
y: tensor([[12051,  1006, 12010,  ...,     2,     2,     2]], device='cuda:0')
decoded msg: [-9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999
 -9999 -9999 -9999 -9999 -9999 -9999]
['ticker', 'NA_VAL', 'event_type', 'direction', 'NA_VAL', 'price', 'fill_size', 'remain_size', 'delta_t_s', 'delta_t_ns', 'time_s', 'time_ns', 'NA_VAL', 'price_ref', 'fill_size_ref', 'time_s_ref', 'time_ns_ref', 'NA_VAL']


In [13]:
# X_true = np.array(np.load(proc_data_dir, mmap_mode='r')[0:num_context_msgs+max_new_tokens])
X_true = np.array(np.load(proc_data_dir, mmap_mode='r')[0:12349+max_new_tokens])
print("X_true:", X_true)

print("true last msg:", X_true[-1])

X_true: [[       40     15969         1 ...     -9999     -9999     -9999]
 [       40     20677         1 ...     -9999     -9999     -9999]
 [       40     22061         1 ...     -9999     -9999     -9999]
 ...
 [       40   9706953         1 ...     -9999     -9999     -9999]
 [       40   9706957         1 ...     -9999     -9999     -9999]
 [       40   9629209         4 ...     34200 398533020     -9999]]
true last msg: [       40   9629209         4         1     18884        13       100
         0         0    109144     34200 606782634     -9999        13
       100     34200 398533020     -9999]


In [14]:
print("decoded msg:", decoded_msg)
print("predicted symbol:", tickers[decoded_msg[0]])
print("predicted event type:", decoded_msg[2])
print("predicted price:", decoded_msg[5])
print("predicted fill size:", decoded_msg[6])
print("predicted remain size:", decoded_msg[7])
print("predicted time:", decoded_msg[10], decoded_msg[11])
print("predicted price ref:", decoded_msg[13])
print("predicted fill size ref:", decoded_msg[14])
print("predicted time ref:", decoded_msg[15], decoded_msg[16])

decoded_msg

decoded msg: [-9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999 -9999
 -9999 -9999 -9999 -9999 -9999 -9999]


KeyError: -9999

In [None]:
# # INIT ORDER BOOKS FROM PROCESSED CONTEXT DATA

# # [ "ticker", "order_id", "event_type", "direction", "price_abs", "price",
# #  "fill_size", "remain_size", "delta_t_s", "delta_t_ns", "time_s", "time_ns",
# #  "old_id", "price_ref", "fill_size_ref", "time_s_ref", "time_ns_ref", "old_price_abs"]

# # init variables to keep track of previous time, price, etc.
# prev_time = first_message['time']
# prev_price = int(first_message['price']*100)

# # iterate through messages and update order books
# for msg in proc_messages:
#     # print(msg)
#     symbol = tickers[msg[0]]
#     # print("Symbol:", symbol)
#     order_id = msg[1]
#     # print("Order ID:", order_id)
#     event_type = msg[2]
#     # print("Event Type:", event_type)
#     price = msg[4]
#     # print("Price:", price)

#     # verify time correctness
#     assert prev_time + (msg[8]*1000000000) + msg[9] == (msg[10] * 1000000000) + msg[11]
#     time = prev_time + (msg[8]*1000000000) + msg[9]
#     # print("Time:", time)

#     # handle order based on event type
#     if event_type == 1:
#         # ADD LIMIT ORDER
#         direction = Side.BID if msg[3] == 0 else Side.ASK
#         # print("Direction:", direction)
#         fill_size = msg[6]
#         # print("Fill Size:", fill_size)
#         order = LimitOrder(
#             order_id=order_id,
#             agent_id=1, # world agent, leave alone for now
#             time_placed=time,
#             symbol=symbol,
#             quantity=fill_size,
#             side=direction,
#             limit_price=price,
#         )
#         order_books[symbol].handle_limit_order(order)
#     elif event_type == 2:
#         # EXECUTE ORDER
#         fill_size = msg[6]
#         # print("Fill Size:", fill_size)
#         direction = Side.BID if msg[3] == 1 else Side.ASK # opposite of direction in non-execution messages
#         # print("Direction:", direction)
#         order = MarketOrder(
#             order_id=order_id,
#             agent_id=1, # world agent, leave alone for now
#             time_placed=time,
#             symbol=symbol,
#             quantity=fill_size,
#             side=direction, # Buy Order if Side.BID (remove liquidity from ask side), Sell Order if Side.ASK (remove liquidity from bid side)
#         )
#         order_books[symbol].handle_market_order(order)
#     elif event_type == 3:
#         # EXECUTE ORDER WITH PRICE DIFFERENT THAN DISPLAY
#         # This order type is most likely an execution of a price-to-comply order, which is handled by the simulator
#         # but this not encoded in the ITCH data beforehand, so we cannot know whether an order is price-to-comply at the time of submission
#         # therefore, we handle this event type as a modifed order and then regular execution order (for now, until we revise the data processing)

#         # modfify the matched limit order
#         direction = Side.BID if msg[3] == 0 else Side.ASK
#         ref_order_time = (msg[15] * 1000000000) + msg[16]
#         ref_order_size = msg[14]
#         ref_order_price = msg[17] # old_price_abs (not mid_price so we cannot calculate using price_ref msg[13])
#         # define original order
#         original_order = LimitOrder(
#             order_id=order_id,
#             agent_id=1, # world agent, leave alone for now
#             time_placed=ref_order_time,
#             symbol=symbol,
#             quantity=ref_order_size,
#             side=direction,
#             limit_price=ref_order_price,
#         )
#         # define modified order
#         # modified_price = msg[4]
#         modified_order = LimitOrder(
#             order_id=order_id,
#             agent_id=1, # world agent, leave alone for now
#             time_placed=ref_order_time,
#             symbol=symbol,
#             quantity=ref_order_size,
#             side=direction,
#             limit_price=price,
#         )
#         order_books[symbol].modify_order(original_order, modified_order)
#         # execute the modified order
#         fill_size = msg[6]
#         # print("Fill Size:", fill_size)
#         direction = Side.BID if msg[3] == 1 else Side.ASK # opposite of direction in non-execution messages
#         # print("Direction:", direction)
#         order = MarketOrder(
#             order_id=order_id,
#             agent_id=1, # world agent, leave alone for now
#             time_placed=time,
#             symbol=symbol,
#             quantity=fill_size,
#             side=direction, # Buy Order if Side.BID (remove liquidity from ask side), Sell Order if Side.ASK (remove liquidity from bid side)
#         )
#         order_books[symbol].handle_market_order(order)
#     elif event_type == 4:
#         # CANCEL ORDER
#         direction = Side.BID if msg[3] == 0 else Side.ASK
#         # print("Direction:", direction)
#         ref_order_time = (msg[15] * 1000000000) + msg[16]
#         if msg[7] == 0:
#             # FULL DELETION
#             fill_size = msg[6]
#             # print("Cancel Size:", fill_size)
#             order = LimitOrder(
#                 order_id=order_id,
#                 agent_id=1, # world agent, leave alone for now
#                 time_placed=ref_order_time,
#                 symbol=symbol,
#                 quantity=fill_size,
#                 # quantity=msg[14], # total size of order when placed
#                 side=direction,
#                 limit_price=price,
#             )
#             order_books[symbol].cancel_order(order)
#         else:
#             # PARTIAL CANCELLATION
#             cancel_size = msg[6]
#             # print("Partial Cancel Size:", cancel_size)
#             ref_order_size = msg[7] + cancel_size # total size of order before partial cancel
#             order = LimitOrder(
#                 order_id=order_id,
#                 agent_id=1, # world agent, leave alone for now
#                 time_placed=ref_order_time,
#                 symbol=symbol,
#                 quantity=ref_order_size,
#                 side=direction,
#                 limit_price=price,
#             )
#             order_books[symbol].partial_cancel_order(order, cancel_size)
#     elif event_type == 5:
#         # REPLACE ORDER
#         direction = Side.BID if msg[3] == 0 else Side.ASK
#         # print("Direction:", direction)
#         old_order_id = msg[12]
#         old_order_time = (msg[15] * 1000000000) + msg[16]
#         old_order_size = msg[14]
#         old_order_price = msg[17] # old_price_abs (not mid_price so we cannot calculate using price_ref msg[13])
#         # define old order
#         old_order = LimitOrder(
#             order_id=old_order_id,
#             agent_id=1, # world agent, leave alone for now
#             time_placed=old_order_time,
#             symbol=symbol,
#             quantity=old_order_size,
#             side=direction,
#             limit_price=old_order_price,
#         )
#         new_order_size = msg[6]
#         # define new order
#         new_order = LimitOrder(
#             order_id=order_id,
#             agent_id=1, # world agent, leave alone for now
#             time_placed=time,
#             symbol=symbol,
#             quantity=new_order_size,
#             side=direction,
#             limit_price=price,
#         )
#         order_books[symbol].replace_order(1, old_order, new_order) # first arg is agent_id (world agent)
#     else:
#         raise NotImplementedError("Event type not implemented")

#     # update previous time and price
#     prev_time = time
#     prev_price = price

# # print end result
# print("L3 bid data:", order_books[symbols[0]].get_l3_bid_data())
# print("L3 ask data:", order_books[symbols[0]].get_l3_ask_data())
# nasdaq_agent.messages