This notebook contains auxiliary functions used to read and plot data.

In [9]:
import matplotlib.pyplot as plt
import random
import pandas as pd
import numpy as np

In [None]:
# Functions used to read the LOB data.
# Generates the column names for the orderbook.
def gen_orderbook_colnames(n_levels):
    trade_dirs = ["ask","bid"]
    trade_attrs = ["p","s"]
    col_names = []
    for level in range(1,n_levels+1):
        for trade_dir in trade_dirs:
            for trade_attr in trade_attrs:
                col_names.append("_".join([trade_dir,
                                trade_attr,str(level)]))
    return col_names
# Reads the CSV files, appending different tickers and dates.
def read_files(tickers,dates,root,start_time
                ,end_time,n_levels,file_type):
    if file_type == "message":
        col_names = ["time","type","order_id","size","price",
        "trade_direction","trade_type"]
        dtypes = {"trade_type": "str"}
    elif file_type == "orderbook":
        col_names = gen_orderbook_colnames(n_levels)
        dtypes = {}
    else:
        raise ValueError("type should be 'message' or 'orderbook'!")
    df_list = []
    for ticker in tickers:
        for date in dates:
            path = root + "_".join([ticker, date, str(start_time),
                                    str(end_time), file_type,
                                    str(n_levels)]) + ".csv"
            df = pd.read_csv(path, names=col_names, dtype=dtypes)
            df["ticker"] = ticker
            df["date"] = date
            df_list.append(df[["date","ticker"] + col_names])
    concat_df = pd.concat(df_list)
    return concat_df

In [None]:
# Ids to name mapping of order types and trade directions.
type_dict = {
    1:"new_limit", 2:"partial_cancellation", 3:"total_deletion",
    4: "visible_execution",5: "hidden_execution", 7: "halt"}
trade_direction_dict = {1:"buy", -1:"sell"}

# Function that returns ask and bid prices and their sizes separately.
def get_prices_sizes(df, idx, n_levels):
    idx_range = [i for i in range(2, n_levels * 4 + 2)]
    a_p_idx = idx_range[0::4]
    a_s_idx = idx_range[1::4]
    b_p_idx = idx_range[2::4]
    b_s_idx = idx_range[3::4]
    a_p = df.iloc[idx, a_p_idx].values / 10000
    a_s = df.iloc[idx, a_s_idx].values
    b_p = df.iloc[idx, b_p_idx].values / 10000
    b_s = df.iloc[idx, b_s_idx].values
    return a_p, a_s, b_p, b_s
# Visually explains the relationship between the message and orderbook.
def explain_orderbook(m_df, o_df, type, trade_direction, n_levels):
    # Filters orders of the given type and direction.
    idx_to_sample = m_df.index[(m_df["type"] == type) &
            (m_df["trade_direction"] == trade_direction)].to_list()
    if len(idx_to_sample) > 0:
        # Random sample of the orderbook and the state before it.
        idx1 = random.sample(idx_to_sample, k=1)[0]
        idx0 = idx1 - 1
        a_p0, a_s0, b_p0, b_s0 = get_prices_sizes(o_df, idx0, n_levels)
        a_p1, a_s1, b_p1, b_s1 = get_prices_sizes(o_df, idx1, n_levels)
        print(f"""type {type_dict[type]}, direction: {trade_direction_dict[trade_direction]},
price: {m_df.iloc[idx1]["price"]}, size: {m_df.iloc[idx1]["size"]}""")
        # Plots the states of the orderbook before and after the order.
        plt.rcParams.update({"font.size": 11})
        tick_interval = 0.01
        fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 5))
        ax0.bar(a_p0, a_s0, width=0.005, color="lightcoral", label="ask")
        ax0.bar(b_p0, b_s0, width=0.005, color="lightgreen", label="bid")
        ax0.bar(a_p0,(a_s0!=a_s1)*a_s0,width=0.005,color="darkred", label="change_ask")
        ax0.bar(b_p0,(b_s0!=b_s1)*b_s0,width=0.005,color="darkgreen", label="change_bid")
        ax0.set_xlabel("price"), ax0.set_ylabel("volume")
        ax0.set_title(r"$t$")
        ax0.set_xticks(np.round(np.arange(min(b_p0), max(a_p0) + tick_interval, tick_interval), 2))
        ax0.set_xticklabels(np.round(np.arange(min(b_p0), max(a_p0) + tick_interval, tick_interval), 2), rotation=45)

        ax1.bar(a_p1,a_s1,width=0.005,color="lightcoral", label="ask")
        ax1.bar(b_p1,b_s1, width=0.005, color="lightgreen", label="bid")
        ax1.bar(a_p1,(a_s0!=a_s1)*a_s1,width=0.005,color="darkred", label="change_ask")
        ax1.bar(b_p1,(b_s0!=b_s1)*b_s1, width=0.005,color="darkgreen", label="change_bid")
        ax1.set_xlabel("price"), ax1.set_ylabel("volume")
        ax1.set_title(r"$t + 1$")
        ax1.set_xticks(np.round(np.arange(min(b_p0), max(a_p0) + tick_interval, tick_interval), 2))
        ax1.set_xticklabels(np.round(np.arange(min(b_p0), max(a_p0) + tick_interval, tick_interval), 2), rotation=45)
        plt.tight_layout(), plt.legend()
        fig.savefig("lob_plot.pdf", format="pdf", dpi=300, bbox_inches="tight")
        plt.show();
    else:
        print(f"""no combination order type={type} and 
        trade_direction={trade_direction} was found!""")