In [2]:
import order_book as bk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
logging.basicConfig(level = logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
cols = ['time', 'type', 'order_id', 'shares', 'price', 'direction']
data = pd.read_csv("data/AAPL_2012-06-21_34200000_57600000_message_5.csv", names = cols)

In [3]:
# create order book object
book = bk.Book()
book_list = []
events = []

# iterate through event messages and read into book
for i in range(100):
    event = data.loc[i]
    book.handleEvent(event, i)
    events.append(event)
    book_list.append(book.getAllLevels())

2024-04-09 15:36:09,893 - INFO - 0 New Order Submission
2024-04-09 15:36:09,896 - INFO - Adding ID 16113575.0 at 5853300.0, vol 18.0, in buy tree
2024-04-09 15:36:09,898 - INFO - limit 5853300.0 does not exist
2024-04-09 15:36:09,900 - INFO - Creating new limit
2024-04-09 15:36:09,902 - INFO - $5853300.0 limit created, ID: 16113575.0 is head
2024-04-09 15:36:09,906 - INFO - Total vol at 5853300.0 has been increased by 18.0
2024-04-09 15:36:09,913 - INFO - Num orders at 5853300.0 has been increased by 1
2024-04-09 15:36:09,916 - INFO - [[[5853300.0, 18.0, 1]], []]
2024-04-09 15:36:09,924 - INFO - 1 New Order Submission
2024-04-09 15:36:09,930 - INFO - Adding ID 16113584.0 at 5853200.0, vol 18.0, in buy tree
2024-04-09 15:36:09,932 - INFO - limit 5853200.0 does not exist
2024-04-09 15:36:09,943 - INFO - Creating new limit
2024-04-09 15:36:09,944 - INFO - $5853200.0 limit created, ID: 16113584.0 is head
2024-04-09 15:36:09,946 - INFO - Total vol at 5853200.0 has been increased by 18.0
202

In [4]:
# here we trim the output to make sure it is symetric
# if longer the 5 levels, trim, if shorter add [0, 0, 0] as filler 
symetric_book = []
for x in book_list:
    bid = x[0]
    ask = x[1]
    if len(bid) < 5:
        for y in range((5-len(bid))):
            bid.append([0, 0, 0])
    elif len(bid) > 5:
        bid = bid[-5:]
    
    if len(ask) < 5:
        for z in range((5-len(ask))):
            ask.append([0, 0, 0])
    elif len(ask) > 5:
        ask = ask[:5]
    symetric_book.append([ask, bid[::-1]])

In [5]:
# here we pull the symetric book output into a DF to match the LOBSTER output
# the symetric_book variable is a nested list so we will flatten it to match
colnames = ['Ask_1','Ask_1_Vol', 'Bid_1', 'Bid_1_Vol','Ask_2','Ask_2_Vol', 'Bid_2', 'Bid_2_Vol','Ask_3','Ask_3_Vol', 'Bid_3', 'Bid_3_Vol','Ask_4','Ask_4_Vol', 'Bid_4', 'Bid_4_Vol','Ask_5','Ask_5_Vol', 'Bid_5', 'Bid_5_Vol']

df_rows = []
for snap_shot in symetric_book:
    ask = snap_shot[0]
    bid = snap_shot[1]
    df_row = []
    for level in range(len(ask)):
        df_row.append(ask[level][:2])
        df_row.append(bid[level][:2])
    a = np.array(df_row)
    a = a.flatten()
    a = list(a)
    df_rows.append(a)

my_output = pd.DataFrame(df_rows, columns = colnames)
my_output = my_output.astype('int64')
my_output[27:37]


Unnamed: 0,Ask_1,Ask_1_Vol,Bid_1,Bid_1_Vol,Ask_2,Ask_2_Vol,Bid_2,Bid_2_Vol,Ask_3,Ask_3_Vol,Bid_3,Bid_3_Vol,Ask_4,Ask_4_Vol,Bid_4,Bid_4_Vol,Ask_5,Ask_5_Vol,Bid_5,Bid_5_Vol
27,5857400,40,5857300,20,5857500,82,5857000,50,5857800,45,5856900,20,5858000,4,5856500,5,5858200,5,5856400,20
28,5857500,82,5857300,20,5857800,45,5857000,50,5858000,4,5856900,20,5858200,5,5856500,5,5859300,100,5856400,20
29,5857500,57,5857300,20,5857800,45,5857000,50,5858000,4,5856900,20,5858200,5,5856500,5,5859300,100,5856400,20
30,5857500,57,5857300,19,5857800,45,5857000,50,5858000,4,5856900,20,5858200,5,5856500,5,5859300,100,5856400,20
31,5857500,57,5857300,9,5857800,45,5857000,50,5858000,4,5856900,20,5858200,5,5856500,5,5859300,100,5856400,20
32,5857500,32,5857300,9,5857800,45,5857000,50,5858000,4,5856900,20,5858200,5,5856500,5,5859300,100,5856400,20
33,5857500,27,5857300,9,5857800,45,5857000,50,5858000,4,5856900,20,5858200,5,5856500,5,5859300,100,5856400,20
34,5857500,20,5857300,9,5857800,45,5857000,50,5858000,4,5856900,20,5858200,5,5856500,5,5859300,100,5856400,20
35,5857800,45,5857300,9,5858000,4,5857000,50,5858200,5,5856900,20,5859300,100,5856500,5,0,0,5856400,20
36,5857800,20,5857300,9,5858000,4,5857000,50,5858200,5,5856900,20,5859300,100,5856500,5,0,0,5856400,20


In [6]:
# here we import the Lobster output to use as comparison to our consturced book
lobster_orderbook = pd.read_csv("data/AAPL_2012-06-21_34200000_57600000_orderbook_1.csv", names=colnames)
lobster_orderbook[6:16]

Unnamed: 0,Ask_1,Ask_1_Vol,Bid_1,Bid_1_Vol,Ask_2,Ask_2_Vol,Bid_2,Bid_2_Vol,Ask_3,Ask_3_Vol,Bid_3,Bid_3_Vol,Ask_4,Ask_4_Vol,Bid_4,Bid_4_Vol,Ask_5,Ask_5_Vol,Bid_5,Bid_5_Vol
6,5857400,40,5857300,20,,,,,,,,,,,,,,,,
7,5857500,82,5857300,20,,,,,,,,,,,,,,,,
8,5857500,57,5857300,20,,,,,,,,,,,,,,,,
9,5857500,57,5857300,19,,,,,,,,,,,,,,,,
10,5857500,57,5857300,9,,,,,,,,,,,,,,,,
11,5857500,32,5857300,9,,,,,,,,,,,,,,,,
12,5857500,27,5857300,9,,,,,,,,,,,,,,,,
13,5857500,20,5857300,9,,,,,,,,,,,,,,,,
14,5857800,45,5857300,9,,,,,,,,,,,,,,,,
15,5857800,20,5857300,9,,,,,,,,,,,,,,,,


In [7]:
# here we test to see if our L1 output matches the lobster output
nbbo_errors = 0
my_output = my_output[27:]
lobster_orderbook = lobster_orderbook[6:]
for x in range(len(my_output)):
    lob_ob = lobster_orderbook.iloc[x][:4].copy()
    my_ob = my_output.iloc[x][:4].copy() 
    if set(my_ob) != set(lob_ob):
        print(x,"\n","My Output:\n", my_ob, "\nLobster Output:\n", lob_ob)
        nbbo_errors += 1

13 
 My Output:
 Ask_1        5859300
Ask_1_Vol        100
Bid_1        5857300
Bid_1_Vol          9
Name: 40, dtype: int64 
Lobster Output:
 Ask_1        5858300.0
Ask_1_Vol          7.0
Bid_1        5857300.0
Bid_1_Vol          9.0
Name: 19, dtype: float64
22 
 My Output:
 Ask_1        5859300
Ask_1_Vol         63
Bid_1        5857400
Bid_1_Vol         50
Name: 49, dtype: int64 
Lobster Output:
 Ask_1        5859300.0
Ask_1_Vol         63.0
Bid_1        5857700.0
Bid_1_Vol         18.0
Name: 28, dtype: float64
23 
 My Output:
 Ask_1        5859300
Ask_1_Vol         63
Bid_1        5857700
Bid_1_Vol         18
Name: 50, dtype: int64 
Lobster Output:
 Ask_1        5859300.0
Ask_1_Vol        163.0
Bid_1        5857700.0
Bid_1_Vol         18.0
Name: 29, dtype: float64
24 
 My Output:
 Ask_1        5859300
Ask_1_Vol         63
Bid_1        5857700
Bid_1_Vol         18
Name: 51, dtype: int64 
Lobster Output:
 Ask_1        5859300.0
Ask_1_Vol        159.0
Bid_1        5857700.0
Bid_1_Vol   

In [8]:
nbbo_errors

52

In [9]:
# looking at the first error, we see that our ask price and vol are different
# lets take the prior event to see what the problem is

my_output[11:15]

Unnamed: 0,Ask_1,Ask_1_Vol,Bid_1,Bid_1_Vol,Ask_2,Ask_2_Vol,Bid_2,Bid_2_Vol,Ask_3,Ask_3_Vol,Bid_3,Bid_3_Vol,Ask_4,Ask_4_Vol,Bid_4,Bid_4_Vol,Ask_5,Ask_5_Vol,Bid_5,Bid_5_Vol
38,5858000,4,5857300,9,5858200,5,5857000,50,5859300,100,5856900,20,0,0,5856500,5,0,0,5856400,20
39,5858200,5,5857300,9,5859300,100,5857000,50,0,0,5856900,20,0,0,5856500,5,0,0,5856400,20
40,5859300,100,5857300,9,0,0,5857000,50,0,0,5856900,20,0,0,5856500,5,0,0,5856400,20
41,5859300,100,5857300,9,0,0,5857000,50,0,0,5856900,20,0,0,5856500,5,0,0,5856400,20


In [10]:
lobster_orderbook[11:15]

Unnamed: 0,Ask_1,Ask_1_Vol,Bid_1,Bid_1_Vol,Ask_2,Ask_2_Vol,Bid_2,Bid_2_Vol,Ask_3,Ask_3_Vol,Bid_3,Bid_3_Vol,Ask_4,Ask_4_Vol,Bid_4,Bid_4_Vol,Ask_5,Ask_5_Vol,Bid_5,Bid_5_Vol
17,5858000,4,5857300,9,,,,,,,,,,,,,,,,
18,5858200,5,5857300,9,,,,,,,,,,,,,,,,
19,5858300,7,5857300,9,,,,,,,,,,,,,,,,
20,5859300,100,5857300,9,,,,,,,,,,,,,,,,


In [None]:
# looking at our book, there is no Ask price of 58583 in any of the lower levels
# this means we have either incorrectly read a message or the  lobster output sample data
# initialises with the book from the previous trading session

In [34]:
# lets read the message message file to look at the message types
# If we find types 2, 3, 4, or 5 before type 1 for a given order ID
# we will know that our output will not match the lobster output
# start by dropping order id of 0

order_ids = data[data['order_id']!=0]
order_ids = order_ids.set_index(['order_id', 'type'])
order_ids = order_ids.sort_values(['order_id'])
order_ids.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,time,shares,price,direction
order_id,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
28825,4,55188.54079,10,5800000,1
29948,4,55188.54079,6,5800000,1
877345,4,56011.637079,5,5787000,1
877362,4,56053.360052,10,5780100,1
877365,4,49309.25074,7,5817800,1
877370,4,55188.569133,45,5800000,1
877370,4,55188.54079,84,5800000,1
877390,4,55191.82677,20,5799900,1
877391,4,34291.033125,10,5846900,1
877394,4,55188.569133,1,5800000,1


In [40]:
# here we can see that we have order ids with only type 4 (visible execution)
# looking at another order id we can see the submission, execution and cancellation
# Therefore, we wont be able to use the LOBSTER sample message file to perfectly 
# recreate the sample output
order_ids = order_ids.loc[287143079]
order_ids

Unnamed: 0_level_0,time,shares,price,direction
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,57599.289452,100,5775500,1
4,57599.355373,89,5775500,1
3,57599.383357,11,5775500,1
