In [None]:
import pandas as pd
import numpy as np

In [None]:
# read the original data
df = pd.read_csv('./data/20170417_AEM_original.csv')
df.head()

When the market state is
* "nan" : The market is closed and just receive orders. 
* "Pre-open" : From 7am to 9:30am. Orders may be entered, but will not be executed. The COP is displayed and continuously updated.
* "Opening" :  Market on Open (MOO) at 9:30am. All matching orders are executed at a single opening trade price with any remaining orders carrying through to the continuous limit order book.
* "Open" : Continuous Trading from 9:30 to 4pm - All regular order types are accepted.
* 'MOC Imbalance' : MOC Imbalance market from 3:40pm to 4pm.
* 'CCP Determination' : Calculated Closing Price (“CCP”) Determination at 4pm.
* 'Extended Hours CXLs' : Post Market Cancel Session from 4:10pm to 4:15pm- During this session, open orders may be cancelled by the dealer.
* 'Extended Hours Open' : Extended Trading Session from 4:15pm to 5pm - Orders at the last sale price are accepted.

See https://www.tsx.com/trading/calendars-and-trading-hours/trading-hours

In [None]:
df['market_state'].unique()

Order types :
* "BOD" : Orders sent when the market is closed.
* "Booked" : Booked limit orders.
* "CANCELLED" : Cancelled limit orders.
* "TRADE" : Market orders.
* "Undisclosed" : Compensator of an unfilled market order.
* "PRICE_CHANGE:COP", "PRICE_CHANGE:AssignLimit" : Orders during preopen when calculting the opening price.

See https://www.tsx.com/resource/en/133

In [None]:
df['reason'].unique()

When the market is Pre-open, we groupby orders by its price and generate a sequence of all the bids and offers.

In [None]:
def nonrecursive_order_book(position, df):
    slice_df = df.loc[:position]

    lob_buy = slice_df[slice_df.side == 'Buy'][['price', 'book_change']]
    lob_buy = lob_buy.groupby('price').sum()
    lob_buy = lob_buy[lob_buy['book_change'] > 0]

    lob_sell = slice_df[slice_df.side == 'Sell'][['price', 'book_change']]
    lob_sell = lob_sell.groupby('price').sum()
    lob_sell = lob_sell[lob_sell['book_change'] > 0]

    return lob_buy, lob_sell


#set depth of the limit order book
N = 5
#slice the pre-open data
start = df[df['market_state'] == 'Pre-open'].index[-1]+1
lob_buy, lob_sell = nonrecursive_order_book(start, df)

#convert the price and volume sequence into dictionary
lob_sell_dict = {}
for price in lob_sell.index.values:
    lob_sell_dict[price] = lob_sell.loc[price]['book_change']
lob_buy_dict = {}
for price in lob_buy.index.values:
    lob_buy_dict[price] = lob_buy.loc[price]['book_change']

After the market is open, we first build a limit order book of depth $N$ using the current biggest bid price $p_b$ and smallest ask price $p_a$.
The limit order on the ask side ranges from $p_a$ to $p_a + (N - 1) * ticksize$. The limit order on the bid side ranges from $p_b - (N-1) * ticksize$ to $p_b$.
If the limit order book at price level $p_a + i * ticksize$ is already in the sequence, record its volume; if not, set the volume equal to 0. It is the same for the bid side. And We update it whenever there comes in a new order.

In [None]:
def update_dict(d, price, bookChange):
    #When a new order comes, if its price is in the sequence, we update the corresponding dictionary; if it is not, we add a new dictionary with its price and volume.
    if price in d.keys():
        bookChange = bookChange + d[price]
    d.update({price: bookChange})
    return

def update_lob(row):        
    #update limit order book
    #If market state is 'Opening', market buy orders go to ask side and market sell orders go to bid side.
    #If market state is 'Open',  market buy orders go to bid side and market sell orders go to ask side.
    if row['market_state'] == 'Opening':
        if row['reason'] == 'TRADE':
            if row.side == 'Sell':
                update_dict(lob_buy_dict, row['price'], row['book_change'])
            if row.side == 'Buy':
                update_dict(lob_sell_dict, row['price'], row['book_change'])
    else:
        if row.side == 'Buy':
            update_dict(lob_buy_dict, row['price'], row['book_change'])
        if row.side == 'Sell':
            update_dict(lob_sell_dict, row['price'], row['book_change'])
              
    b = {x:y for x,y in lob_buy_dict.items() if y!=0}
    a = {x:y for x,y in lob_sell_dict.items() if y!=0}
    
    return generate_lob(a,b)
    
def generate_lob(a,b):

    r = {}
    for i in range(N):
        r['pa' + str(i)] =  np.round(min(a.keys())+ i*0.01,2)
        r['pb' + str(i)] =  np.round(max(b.keys())- i*0.01,2)

        if np.round((min(a.keys())+ i*0.01),2) in a.keys():
            r['va' + str(i)] =  a[np.round(min(a.keys())+ i*0.01,2)]
        else:
            r['va' + str(i)] =  0
        
        if np.round((max(b.keys())- i*0.01),2) in b.keys():
            r['vb' + str(i)] = b[np.round(max(b.keys())- i*0.01,2)]
        else:
            r['vb' + str(i)] =  0
            
    return np.array(list(r.values())).reshape(1,4*N)

In [None]:
#The limit order book after periodic bidding
original_lob = generate_lob(lob_sell_dict, lob_buy_dict)

#Generate the limit order book
df_open = df.loc[start+1:]
df_open.reset_index(inplace = True, drop = True)
result = list(map(lambda x : update_lob(df_open.iloc[x]), range(len(df_open))))

Concatenate the limit order book data frame with original dataframe

In [None]:
lob = pd.DataFrame(np.concatenate((original_lob,np.concatenate(result))))
#lob = pd.DataFrame(np.concatenate(result))
col = {}
for i in range(N):
    col.update({(4 * i)  : 'pa'+ str(i)})
    col.update({(4 * i + 1)  : 'pb'+ str(i)})
    col.update({(4 * i + 2)  : 'va'+ str(i)})
    col.update({(4 * i + 3)  : 'vb'+ str(i)})

lob.rename(columns = col, inplace = True)
merge_lob = pd.concat([df_open, lob], axis=1).reindex(lob.index)
merge_lob.head()