In [1]:
import sys
sys.path.append('../src')
from order_book import Book
from event import Event

from scipy.linalg import block_diag
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import numpy as np
import plotly.graph_objects as go

import warnings
warnings.simplefilter(action='ignore', category=UserWarning)

# import lobster message file
cols = ['time', 'type', 'id', 'shares', 'price', 'direction']
data = pd.read_csv("../data/lobster/MSFT_2012-06-21_34200000_37800000_message_50.csv", names=cols)
# re-scale the price col
data.price = data.price/10000
# make sure data is during market hours
data = data[data['time']>= 9.5*60*60]
data = data[data['time']<= 16*60*60]

print(len(data))
data.head()

141507


Unnamed: 0,time,type,id,shares,price,direction
0,34200.013994,3,16085616,100,31.04,-1
1,34200.013994,1,16116348,100,31.05,-1
2,34200.015248,1,16116658,100,31.04,-1
3,34200.015442,1,16116704,100,31.05,-1
4,34200.015789,1,16116752,100,31.06,-1


In [2]:
# read the messages and transform into an oderbook
book = Book()

for i in range(5_100):
    event = Event(data.loc[i])
    book.handleEvent(event, i)

In [3]:
# format the orderbook 
order_book = book.formatBook(start_from=100, levels=1)
print(len(order_book))
order_book.head()

5000


Unnamed: 0,Ask_1,Ask_1_Vol,Ask_1_Ord,Bid_1,Bid_1_Vol,Bid_1_Ord,Time
0,31.0,6700.0,2.0,30.95,300.0,1.0,03:30:00.197858
1,31.0,6700.0,2.0,30.95,300.0,1.0,03:30:00.197889
2,31.0,6700.0,2.0,30.95,300.0,1.0,03:30:00.197889
3,31.0,6700.0,2.0,30.95,300.0,1.0,03:30:00.197919
4,31.0,6700.0,2.0,30.95,300.0,1.0,03:30:00.197919


In [15]:
# plot the price traces
bid_q = go.Scatter(x=order_book.index, y=order_book.Bid_1_Ord, mode='lines', line=dict(color='green'), name='Bid')
ask_q = go.Scatter(x=order_book.index, y=order_book.Ask_1_Ord, mode='lines', line=dict(color='red'), name='Ask')
traces = [bid_q, ask_q]
fig = go.Figure(data=traces)
fig.update_layout(title='MSFT BBO Order Queue Length', xaxis_title='Time', yaxis_title='Queue Length', width=1440, height=800, hovermode='x')
fig.show()

!['Q'](../data/images/q_lens.png)

In [5]:
# set up variables, markov state space and symetriza
order_book['Mid'] = 0.5 * (order_book['Bid_1'] + order_book['Ask_1'])
order_book['Imbalance'] = order_book['Bid_1_Ord'] / (order_book['Bid_1_Ord'] + order_book['Ask_1_Ord'])
order_book['Spread'] = (order_book['Ask_1'] - order_book['Bid_1'])
order_book['Weighted_Mid'] = (order_book['Imbalance']*order_book['Ask_1']) + ((1-order_book['Imbalance'])*order_book['Bid_1'])

n_imbalance = 10
order_book['Imbalance_Bucket'] = pd.qcut(order_book['Imbalance'], n_imbalance, labels=False)

tick_size = np.round(min(order_book['Spread']*100))/100
n_spread = 2
order_book = order_book.loc[(order_book['Spread'] <= n_spread*tick_size) & order_book['Spread'] > 0]
order_book['Spread'] = [np.round(x*100)/100 for x in order_book['Spread']]

dt = 1
order_book['Next_Mid'] = order_book['Mid'].shift(-dt)
order_book['Next_Spread'] = order_book['Spread'].shift(-dt)
order_book['Next_Time'] = order_book['Time'].shift(-dt)
order_book['Next_Imbalance_Bucket'] = order_book['Imbalance_Bucket'].shift(-dt)
order_book['Delta_Mid'] = np.round((order_book['Next_Mid'] - order_book['Mid'])/tick_size*2)*tick_size/2
order_book = order_book.loc[(order_book['Delta_Mid'] <= tick_size*1.1) & (order_book['Delta_Mid'] >= -tick_size*1.1)]

order_book_sym = order_book.copy(deep=True)
order_book_sym['Imbalance_Bucket'] = n_imbalance - 1 -order_book_sym['Imbalance_Bucket']
order_book_sym['Next_Imbalance_Bucket'] = n_imbalance - 1 - order_book_sym['Next_Imbalance_Bucket']
order_book_sym['Delta_Mid'] = -order_book_sym['Delta_Mid']
order_book_sym['Mid'] = -order_book_sym['Mid']

symmetrized_data = pd.concat([order_book, order_book_sym])
symmetrized_data.index = pd.RangeIndex(len(symmetrized_data.index))

In [6]:
# get Q1 and R1
no_change = symmetrized_data[symmetrized_data['Delta_Mid']==0]
no_change_counts = no_change.pivot_table(index=['Next_Imbalance_Bucket'], columns=['Spread', 'Imbalance_Bucket'], values='Time', fill_value=0, aggfunc='count').unstack()
q_counts = np.resize(np.array(no_change_counts[0:(n_imbalance*n_imbalance)]), (n_imbalance, n_imbalance))
for i in range(1, n_spread):
    qi = np.resize(np.array(no_change_counts[(i*n_imbalance*n_imbalance):(i+1)*(n_imbalance*n_imbalance)]), (n_imbalance, n_imbalance))
    q_counts= block_diag(q_counts,qi)

change = symmetrized_data[(symmetrized_data['Delta_Mid']!=0)]
change_counts = change.pivot_table(index=['Delta_Mid'], columns=['Spread', 'Imbalance_Bucket'], values='Time', fill_value=0, aggfunc='count').unstack()
r_counts = np.resize(np.array(change_counts), (n_imbalance*n_spread, 4))

q_and_r_counts = np.concatenate((q_counts, r_counts), axis=1).astype(float)
for i in range(0, n_imbalance*n_spread):
    q_and_r_counts[i] = q_and_r_counts[i]/q_and_r_counts[i].sum()

Q = q_and_r_counts[:, 0:(n_imbalance*n_spread)]
R1 = q_and_r_counts[:,(n_imbalance*n_spread):]

In [7]:
# get R2
change = symmetrized_data[(symmetrized_data['Delta_Mid']!=0)]
change_counts = change.pivot_table(index=['Spread', 'Imbalance_Bucket'], columns=['Next_Spread', 'Next_Imbalance_Bucket'], values='Time', fill_value=0, aggfunc='count')
r2_counts = np.resize(np.array(change_counts), (n_imbalance*n_spread, n_imbalance*n_spread))
q_and_r2_counts = np.concatenate((q_counts, r2_counts), axis=1).astype(float)
for i in range(0, n_imbalance*n_spread):
    q_and_r2_counts[i] = q_and_r2_counts[i]/q_and_r2_counts[i].sum()
R2 = q_and_r2_counts[:,(n_imbalance*n_spread):]

In [8]:
# get G1 and B
K = np.array([-0.01, -0.005, 0.005, 0.01])
G1 = np.dot(np.dot(np.linalg.inv(np.eye(n_imbalance*n_spread)-Q),R1),K)
B = np.dot(np.linalg.inv(np.eye(n_imbalance*n_spread)-Q),R2)

In [9]:
# get G6
G2=np.dot(B,G1)+G1
G3=G2+np.dot(np.dot(B,B),G1)
G4=G3+np.dot(np.dot(np.dot(B,B),B),G1)
G5=G4+np.dot(np.dot(np.dot(np.dot(B,B),B),B),G1)
G6=G5+np.dot(np.dot(np.dot(np.dot(np.dot(B,B),B),B),B),G1)

In [10]:
imb = [x for x in range(n_imbalance)]
mid_price = np.linspace(-0.005, 0.005, n_imbalance)*0
weighted_mid = np.linspace(-0.005, 0.005, n_imbalance)
G6_spread_1 = G6[:n_imbalance]
G6_spread_2 = G6[n_imbalance:]

fig = go.Figure()
fig.add_trace(go.Scatter(x=imb, y=mid_price, name='Mid Price'))
fig.add_trace(go.Scatter(x=imb, y=weighted_mid, name='Weighted Mid'))
fig.add_trace(go.Scatter(x=imb, y=G6_spread_1, name='G6: Spread = 1 tick adj'))
fig.add_trace(go.Scatter(x=imb, y=G6_spread_2, name='G6: Spread = 2 tick adj'))
fig.update_layout(title='Adjustments', xaxis_title='Imbalance Bucket', yaxis_title='Price Change', height=700, width=800)
fig.show()

!['q_adj'](../data/images/q_adj.png)

In [11]:
w = np.linalg.matrix_power(B, 100)
spread_1 = w[0][:n_imbalance]
spread_2 = w[0][n_imbalance:]

fig = go.Figure()
fig.add_trace(go.Scatter(x=imb, y=spread_1, name='Spread = 1 tick adj'))
fig.add_trace(go.Scatter(x=imb, y=spread_2, name='Spread = 2 tick adj'))
fig.update_layout(title='Stationary Distribution', xaxis_title='Imbalance Bucket', yaxis_title='Frequency', height=700, width=800)
fig.show()

!['q_counts'](../data/images/q_counts.png)

In [12]:
spreads = {0.01: 0,
           0.02: 10}

micro_price = []
for i in range(len(order_book)):
    dec = spreads.get(order_book.iloc[i]['Spread'])
    unit = order_book.iloc[i]['Imbalance_Bucket']
    micro = order_book.iloc[i]['Mid'] + G6[dec+unit]
    micro_price.append(micro)

order_book['Micro_Price'] = micro_price

In [14]:
bid = go.Scatter(x=order_book.index, y=order_book.Bid_1, mode='lines', line=dict(color='green'), name='Bid')
ask = go.Scatter(x=order_book.index, y=order_book.Ask_1, mode='lines', line=dict(color='red'), name='Ask')
mid = go.Scatter(x=order_book.index, y=order_book.Mid, mode='lines', line=dict(color='blue'), name='Mid')
wmid = go.Scatter(x=order_book.index, y=order_book.Weighted_Mid, mode='lines', line=dict(color='black'), name='W-Mid')
micro = go.Scatter(x=order_book.index, y=order_book.Micro_Price, mode='lines', line=dict(color='purple'), name='Micro')
traces = [bid, ask, mid, wmid, micro]
fig = go.Figure(data=traces)
fig.update_layout(title='MSFT Bid / Ask', xaxis_title='Time', yaxis_title='Price', width=1440, height=800, hovermode='x')
fig.show()

!['q_micro'](../data/images/q_micro.png)