In [2]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import db_lob as lob

In [58]:
# Step 1: load a list of messages
messages_file_path = '2505133.csv'
print("Reading messages...")
messages = lob.parse_FullMessages(messages_file_path)
print("Messages read!")

Reading messages...
Messages read!


In [59]:
# book
volume_threshold = 1000000
ticksize = 0.0001
book = lob.LimitOrderBook(volume_threshold, ticksize)

In [65]:
# Step 3: loop over the messages and feed them to the lob
# Just for testing, let's just look at the first N% messages.
pct = 0.9
n_msg = int(len(messages) * pct)
for msg in tqdm(messages[:n_msg], desc="Reconstructing the book"):
    bars = book.generic_incremental_update(msg)

    if bars is not None:
        vbars = pd.DataFrame.from_records(bars[0])
        fbars = pd.DataFrame.from_records(bars[1])

Reconstructing the book: 100%|██████| 1484028/1484028 [02:46<00:00, 8916.29it/s]


In [67]:
def df_from_book(book):
    import pandas as pd
    ask_side, bid_side = book.askTree, book.bidTree
    a = list(ask_side.values())
    b = list(bid_side.values())
    ask_price, bid_price, ask_volume, bid_volume = [], [], [], []
    for ask, bid in zip(a,b):
        ask_price.append(ask.price)
        bid_price.append(bid.price)
        ask_volume.append(ask.totalVolume)
        bid_volume.append(bid.totalVolume)
    df = pd.DataFrame(ask_volume, columns=['ask_volume'])
    df['ask_price'] = ask_price
    df['bid_volume'] = bid_volume
    df['bid_price'] = bid_price
    return df

In [68]:
df = df_from_book(book)
import os
dir = 'data_cleaned'
if os.path.isdir(dir)==False:
        os.mkdir(dir)
df.to_csv(dir+'/LOB.csv', index=False)

In [69]:
df

Unnamed: 0,ask_volume,ask_price,bid_volume,bid_price
0,700000,15708000000,250000,15706000000
1,700000,15710000000,700000,15704000000
2,250000,15712000000,250000,15702000000
3,500000,15713999999,700000,15700000000
4,500000,15716000000,400000,15697999999
...,...,...,...,...
280,50000,40000000000,1000000,13612000000
281,10000,43000000000,500000,13610000000
282,20000,50000000000,2000000,13600000000
283,130000,200000000000,200000,13590000000


In [56]:
fbars

Unnamed: 0,best_bid_price,best_ask_price,best_bid_volume,best_ask_volume,bid_ask_spread,mid_price,VWAP,order_flow_imbalance,time,total_ask_volume,total_bid_volume,ask_moment_1,bid_moment_1,ask_moment_2,bid_moment_2,ask_moment_3,bid_moment_3,entropy_on_ask,entropy_on_bid
0,157,157,700000,500000,0,157,157.388333,0.0,1586159169049650171,423199981,880509980,1.894521e+10,-1.287277e+10,3.889607e+20,1.752443e+20,1.210850e+31,-2.448350e+30,6.800906,7.604482
1,157,157,700000,500000,0,157,157.388333,0.0,94752437745008,423199981,880509980,1.894521e+10,-1.287277e+10,3.889607e+20,1.752443e+20,1.210850e+31,-2.448350e+30,6.800906,7.604482
2,157,157,700000,500000,0,157,157.388333,0.0,94752437745008,423199981,880509980,1.894521e+10,-1.287277e+10,3.889607e+20,1.752443e+20,1.210850e+31,-2.448350e+30,6.800906,7.604482
3,157,157,700000,500000,0,157,157.388333,0.0,94752437745008,423199981,880509980,1.894521e+10,-1.287277e+10,3.889607e+20,1.752443e+20,1.210850e+31,-2.448350e+30,6.800906,7.604482
4,157,157,700000,500000,0,157,157.388333,0.0,1586159169049780605,423199981,880509980,1.894521e+10,-1.287277e+10,3.889607e+20,1.752443e+20,1.210850e+31,-2.448350e+30,6.800906,7.604482
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,157,157,700000,500000,0,157,157.388333,0.0,1586159172527707215,423519981,880509980,1.894319e+10,-1.287277e+10,3.888669e+20,1.752443e+20,1.210260e+31,-2.448350e+30,6.805411,7.604482
206,157,157,700000,500000,0,157,157.388333,0.0,1586159172539778320,423519981,880509980,1.894319e+10,-1.287277e+10,3.888669e+20,1.752443e+20,1.210260e+31,-2.448350e+30,6.805411,7.604482
207,157,157,700000,500000,0,157,157.388333,0.0,1586159172539870383,423519981,880509980,1.894319e+10,-1.287277e+10,3.888669e+20,1.752443e+20,1.210260e+31,-2.448350e+30,6.805411,7.604482
208,157,157,700000,500000,0,157,157.388333,0.0,1586159172539883657,423519981,880509980,1.894319e+10,-1.287277e+10,3.888669e+20,1.752443e+20,1.210260e+31,-2.448350e+30,6.805411,7.604482
