In [1]:
import sys
sys.path.append('../src')
from order_book import Book
#from order_book import Format
from event import Event
from datetime import datetime
import pandas as pd
import numpy as np

# set up logger to track events
import log
logger = log.get_logger('Main')
logger.info('testing logger from module Main')

# import lobster message file
cols = ['time', 'type', 'id', 'shares', 'price', 'direction']
data = pd.read_csv("../data/lobster/AAPL_2012-06-21_34200000_37800000_message_50.csv", names=cols)
# re-scale the price col
data.price = data.price/10000
# make sure data is during market hours
data = data[data['time']>= 9.5*60*60]
data = data[data['time']<= 16*60*60]
print(len(data))
data.head()

91997


Unnamed: 0,time,type,id,shares,price,direction
0,34200.004241,1,16113575,18,585.33,1
1,34200.004261,1,16113584,18,585.32,1
2,34200.004447,1,16113594,18,585.31,1
3,34200.025552,1,16120456,18,585.91,-1
4,34200.02558,1,16120480,18,585.92,-1


In [2]:
# examine the data and count the occurences of each ID 
element_count = data['id'].value_counts()
# split the IDs that have one or more than one event
single_event = element_count[element_count == 1]
multi_event = element_count[element_count > 1]

single_event
# here we see there are 457 different IDs with only one event in the message data 

73148432    1
18491035    1
72382240    1
46624554    1
74158838    1
           ..
73146379    1
2125484     1
65572988    1
16339076    1
21526067    1
Name: id, Length: 457, dtype: int64

In [3]:
# highlight single event vs multi event IDs
data['multi_event'] = [True if x in multi_event else False for x in data['id']]
# show single events and count the types
data[data['multi_event']==False]['type'].value_counts()
# here we can see 72 type 3 and 6 type 4 events
# given we are counting for signle event IDs, these IDs will not have a corresponding type 1 event to cancel or execute against
# therefore, we must ignore these and accept that our orderbook will not exactly match the LOBSTER output

1    379
3     72
4      6
Name: type, dtype: int64

In [4]:
# read the data into our order book
# create order book object
book = Book()
events = []

# iterate through event messages and read into book - test using first 20k events
for i in range(20000):
    event = Event(data.loc[i])
    book.handleEvent(event, i)
    events.append(event)

In [5]:
# pull all the visible executions into a trades data frame
# split them into buys and sells based on direction
trades = pd.DataFrame(book.trades, columns=['Time', 'Price', 'Shares', 'Direction'])
trades = trades.set_index(['Time'])
sells = trades[trades['Direction']==-1]
buys = trades[trades['Direction']==1]

In [6]:
# pull all the hidden executions into a trades data frame
# split them into buys and sells based on direction
hidden_trades = pd.DataFrame(book.hidden_trades, columns=['Time', 'Price', 'Shares', 'Direction'])
hidden_trades = hidden_trades.set_index(['Time'])
hidden_sells = hidden_trades[hidden_trades['Direction']==-1]
hidden_buys = hidden_trades[hidden_trades['Direction']==1]

In [7]:
# get the book snapshots list and format it to match the LOBSTER output
# We split the bid and ask side of the book to make it easier to plot below
# start from the 10th row to remove some empty levels

book_formatted = book.formatBook(5, 5)
bids, asks = book.splitBidsAsks(book_formatted)

In [8]:
mid = (asks['Ask_1']+bids['Bid_1'])/2
mid = pd.DataFrame(mid, columns=['Price'])

In [9]:
import plotly.graph_objects as go
import plotly.io as pio
# set up all line traces for different features of our data set

# Scatter for midpoint price
mid_trace = go.Scatter(x=mid.index, y=mid.Price, mode='lines',
                       line=dict(color='black'),
                       name='Mid')

# Scatter plots for sells and buys
sell_trace = go.Scatter(x=sells.index, y=sells.Price, mode='markers',
                        marker=dict(color='rgba(226,76,79,255)', size=np.log(sells.Shares)*5),
                        name='Sell',
                        text = 'Price:  ' + sells['Price'].astype(str) + '<br>' + 'Volume: ' + sells['Shares'].astype(str))
buy_trace = go.Scatter(x=buys.index, y=buys.Price, mode='markers',
                       marker=dict(color='rgba(48,194,150,255)', size=np.log(buys.Shares)*5),
                       name='Buy',
                       text = 'Price: ' + buys['Price'].astype(str) + '<br>' + 'Volume: ' + buys['Shares'].astype(str))

# Scatter plots for hidden sells and buys
hidden_sell_trace = go.Scatter(x=hidden_sells.index, y=hidden_sells.Price, mode='markers',
                        marker=dict(symbol='square', color='Yellow', size=np.log(sells.Shares)*2, opacity=1, line=dict( color='Black',width=1)),
                        name='Hidden Sell',
                        text = 'Price:  ' + hidden_sells['Price'].astype(str) + '<br>' + 'Volume: ' + hidden_sells['Shares'].astype(str))
hidden_buy_trace = go.Scatter(x=hidden_buys.index, y=hidden_buys.Price, mode='markers',
                       marker=dict(symbol='square', color='Yellow', size=np.log(hidden_buys.Shares)*2, opacity=1, line=dict( color='Black',width=1)),
                       name='Hidden Buy',
                       text = 'Price: ' + hidden_buys['Price'].astype(str) + '<br>' + 'Volume: ' + hidden_buys['Shares'].astype(str))

# Line plot for trades
trade_trace = go.Scatter(x=trades.index, y=trades.Price, mode='lines',
                         line=dict(color='blue'),
                         name='Trades')

# Line plots for asks and bids in the book
ask_line_trace = go.Scatter(x=asks.index, y=asks.Ask_1, mode='lines',
                            line=dict(color='red'),
                            name='Best Ask Line',
                            text = 'Price:  ' + asks['Ask_1'].astype(str) + '<br>' + 'Volume: ' + asks['Ask_1_Vol'].astype(str))

bid_line_trace = go.Scatter(x=bids.index, y=bids.Bid_1, mode='lines',
                            line=dict(color='green'),
                            name='Best Bid Line',
                            text = 'Price:  ' + bids['Bid_1'].astype(str) + '<br>' + 'Volume: ' + bids['Bid_1_Vol'].astype(str))

ask_2_line_trace = go.Scatter(x=asks.index, y=asks.Ask_2, mode='lines',
                            line=dict(color='Purple'),
                            name='2 Ask Line',
                            text = 'Price:  ' + asks['Ask_2'].astype(str) + '<br>' + 'Volume: ' + asks['Ask_2_Vol'].astype(str))

bid_2_line_trace = go.Scatter(x=bids.index, y=bids.Bid_2, mode='lines',
                            line=dict(color='Purple'),
                            name='2 Bid Line',
                            text = 'Price:  ' + bids['Bid_2'].astype(str) + '<br>' + 'Volume: ' + bids['Bid_2_Vol'].astype(str))

ask_3_line_trace = go.Scatter(x=asks.index, y=asks.Ask_3, mode='lines',
                            line=dict(color='Blue'),
                            name='3 Ask Line',
                            text = 'Price:  ' + asks['Ask_3'].astype(str) + '<br>' + 'Volume: ' + asks['Ask_3_Vol'].astype(str))

bid_3_line_trace = go.Scatter(x=bids.index, y=bids.Bid_3, mode='lines',
                            line=dict(color='Blue'),
                            name='3 Bid Line',
                            text = 'Price:  ' + bids['Bid_3'].astype(str) + '<br>' + 'Volume: ' + bids['Bid_3_Vol'].astype(str))

ask_4_line_trace = go.Scatter(x=asks.index, y=asks.Ask_4, mode='lines',
                            line=dict(color='Orange'),
                            name='4 Ask Line',
                            text = 'Price:  ' + asks['Ask_4'].astype(str) + '<br>' + 'Volume: ' + asks['Ask_4_Vol'].astype(str))

bid_4_line_trace = go.Scatter(x=bids.index, y=bids.Bid_4, mode='lines',
                            line=dict(color='Orange'),
                            name='4 Bid Line',
                            text = 'Price:  ' + bids['Bid_4'].astype(str) + '<br>' + 'Volume: ' + bids['Bid_4_Vol'].astype(str))

ask_5_line_trace = go.Scatter(x=asks.index, y=asks.Ask_5, mode='lines',
                            line=dict(color='Pink'),
                            name='5 Ask Line',
                            text = 'Price:  ' + asks['Ask_5'].astype(str) + '<br>' + 'Volume: ' + asks['Ask_5_Vol'].astype(str))

bid_5_line_trace = go.Scatter(x=bids.index, y=bids.Bid_5, mode='lines',
                            line=dict(color='Pink'),
                            name='5 Bid Line',
                            text = 'Price:  ' + bids['Bid_5'].astype(str) + '<br>' + 'Volume: ' + bids['Bid_5_Vol'].astype(str))

# for comparison to lobster, we reset-ind
# ex for best bid and asks and the lobster output does not have this
asks = asks.reset_index()
bids = bids.reset_index()

my_ask_line_trace = go.Scatter(x=asks.index, y=asks.Ask_1, mode='lines',
                            line=dict(color='red'),
                            name='My Ask Line',
                            text = 'Price:  ' + asks['Ask_1'].astype(str) + '<br>' + 'Volume: ' + asks['Ask_1_Vol'].astype(str) + '<br>' + 'Orders: ' + asks['Ask_1_Ord'].astype(str))

my_bid_line_trace = go.Scatter(x=bids.index, y=bids.Bid_1, mode='lines',
                            line=dict(color='green'),
                            name='My Bid Line',
                            text = 'Price:  ' + bids['Bid_1'].astype(str) + '<br>' + 'Volume: ' + bids['Bid_1_Vol'].astype(str) + '<br>' + 'Orders: ' + bids['Bid_1_Ord'].astype(str))

In [10]:
graph_execution_data = [sell_trace, buy_trace, hidden_sell_trace, hidden_buy_trace, trade_trace]

# Configure layout
layout = go.Layout(title='AAPL Executions',
                   xaxis=dict(title='Time', linecolor='black'),
                   yaxis=dict(title='Price', linecolor='black'),
                   height=750)

# Create figure
fig = go.Figure(data=graph_execution_data, layout=layout)

#pio.write_image(fig, 'AAPL_Executions.png', format='png', scale=1, width=1400, height=800)
#fig.show()

Sequence of buy and sell executions as well as hidden executions

!['Executions'](../data/images/AAPL_Executions.png)

A closer look:

!['Zoomed Executions'](../data/images/AAPL_Executions_Zoom.png)

In [11]:

graph_bbo_data = [ask_line_trace, bid_line_trace, ask_2_line_trace, bid_2_line_trace, ask_3_line_trace, bid_3_line_trace, ask_4_line_trace, bid_4_line_trace, ask_5_line_trace, bid_5_line_trace, mid_trace]

# Configure layout
layout = go.Layout(title='AAPL Full Depth',
                   xaxis=dict(title='Time', linecolor='black'),
                   yaxis=dict(title='Price', linecolor='black'),
                   height=750)

# Create figure
fig = go.Figure(data=graph_bbo_data, layout=layout)

#pio.write_image(fig, 'AAPL_Full_Depth.png', format='png', scale=1, width=1400, height=800)
#fig.show()

All 5 bid and ask levels:

!['All 5 bid and ask levels'](../data/images/AAPL_Full_Depth.png)

A closer look:

!['Closer Look'](../data/images/AAPL_Full_Depth_Zoom.png)

In [12]:
# compare to LOBSTER Orderbook file

from plotly.subplots import make_subplots

data = pd.read_csv("../data/lobster/AAPL_2012-06-21_34200000_57600000_orderbook_1.csv", names=['Ask_1', 'Ask_1_Vol', 'Bid_1', 'Bid_1_Vol'])
data.Ask_1 = data.Ask_1/10000
data.Bid_1 = data.Bid_1/10000
# trim to roughly the same time frame
data = data[:8750]
lb_bids = data.drop(columns=['Ask_1', 'Ask_1_Vol'])
lb_asks = data.drop(columns=['Bid_1', 'Bid_1_Vol'])

# Line plots for asks and bids
lb_ask_line_trace = go.Scatter(x=lb_asks.index, y=lb_asks.Ask_1, mode='lines',
                            line=dict(color='red'),
                            name='Lob Ask Line',
                            text = 'Price:  ' + lb_asks['Ask_1'].astype(str) + '<br>' + 'Volume: ' + lb_asks['Ask_1_Vol'].astype(str))


lb_bid_line_trace = go.Scatter(x=lb_bids.index, y=lb_bids.Bid_1, mode='lines',
                            line=dict(color='green'),
                            name='Lob Bid Line',
                            text = 'Price:  ' + lb_bids['Bid_1'].astype(str) + '<br>' + 'Volume: ' + lb_bids['Bid_1_Vol'].astype(str))

graph_data = [lb_ask_line_trace, lb_bid_line_trace]

# Configure layout
layout = go.Layout(title='AAPL BBO LOBSTER Output',
                   xaxis=dict(title='Time', linecolor='black'),
                   yaxis=dict(title='Price',linecolor='black'),
                   height=750)


# get side by side view
fig = make_subplots(rows=1, cols=2, subplot_titles=('Lobster Output', 'My Output'),horizontal_spacing=0.1)
# lobster traces
fig.add_trace(lb_ask_line_trace, row=1,col=1)
fig.add_trace(lb_bid_line_trace, row=1,col=1)
# my orderr book output traces
fig.add_trace(my_ask_line_trace, row=1,col=2)
fig.add_trace(my_bid_line_trace, row=1,col=2)

#pio.write_image(fig, 'AAPL_BBO_LOBSTER_Comparison.png', format='png', scale=1, width=1400, height=800)
#fig.show()

Lobster orderbook file compared with our orderbook that we constructed based on the message file

!['Lobster orderbook file compared with our orderbook that we constructed based on the message file'](../data/images/AAPL_BBO_LOBSTER_Comparison.png)