In [5]:
from __future__ import annotations
import polars as pl
import seaborn as sns
import tqdm
from databento_dbn import FIXED_PRICE_SCALE, UNDEF_PRICE
# plot price of each over time
import plotly.graph_objs as go
import plotly.offline as pyo
from helpers import * 


In [2]:
data = pl.read_parquet("/Users/danny/trading/databento/mbp.parquet")
data = data.filter((pl.col("symbol") == "GOOGL") | (pl.col("symbol") == "GOOG"))
data = data.filter(pl.col("price") != UNDEF_PRICE)
data = data.with_columns(pl.col("ts_event") - pl.col("ts_event").min())
data = data[:5_000_000]
data = data.filter(pl.col("price") < 200370000000.0)
data.shape

(4999993, 75)

In [None]:
import pandas as pd
import numpy as np
import datetime as dt  # For plotting x-axis as dates
import matplotlib.pyplot as plt
import statsmodels.api as sm

from arbitragelab.trading import BasicCopulaTradingRule
import arbitragelab.copula_approach.copula_calculation as ccalc
from arbitragelab.copula_approach.archimedean import (Gumbel, Clayton, Frank, Joe, N13, N14)
from arbitragelab.copula_approach.elliptical import (StudentCopula, GaussianCopula)

In [6]:

def plot_price(data: pl.DataFrame, symbols: list[str]):
    traces = []
    vals = []
    for symbol in symbols: 
        symbol_data = data.filter(pl.col("symbol") == symbol)
        symbol_data = symbol_data.with_columns(pl.col("ts_event").dt.total_seconds())
        symbol_data = symbol_data.group_by("ts_event").agg(pl.col("price").mean())
        symbol_data = symbol_data.with_columns((pl.col("price") / pl.col("price").shift(1)).log())
        symbol_data = symbol_data.with_columns(pl.col("price") / FIXED_PRICE_SCALE)
        merged = symbol_data.to_pandas()
        vec = merged["price"].values
        vals.append(vec)

        trace = go.Scatter(
            x=merged.index,
            y=merged["price"],
            mode='lines',
            name=symbol
        )
        traces.append(trace)

    # Cast to fp16
    # Drop na 
    # vals = [val[~np.isnan(val)] for val in vals]
    length = min([len(val) for val in vals])
    vals = [val[2:length] for val in vals]
    # vals = [np.array(val, dtype=np.float16) for val in vals]
    print(distance_correlation(vals[0], vals[1]))
    print(np.corrcoef(vals[0], vals[1])[0, 1])

    layout = go.Layout(
        title='Interactive Plot of Bid and Ask Prices',
        xaxis=dict(title='Time'),
        yaxis=dict(title='Price'),
        hovermode='closest'
    )

    fig = go.Figure(data=traces, layout=layout)
    pyo.iplot(fig)

plot_price(data,["GOOGL", "GOOG"])

In [None]:
from fasterbook.lob import LimitOrderBook, Order

book = LimitOrderBook()


def row_to_order(row: pl.Series) -> Order:
    return Order(
        uid=row["order_id"], 
        price=row["price"],
        is_bid=row["side"] == "B",
        size=row["size"],
        timestamp=row["ts_recv"]
    )

is_first = True
prev_item = None
for row in tqdm.tqdm(data.iter_rows(named=True), total=data.shape[0]):
    if row["action"] != "A":
        continue
    order = row_to_order(row)
    print(order)

    order.previous_item = prev_item
    prev_item = order
    book.process(row_to_order(row))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# hist depth 
plt.hist(data['depth'].to_numpy(), bins=100)

In [None]:
data['depth'].value_counts().sort("depth") 

In [None]:
data.filter(pl.col("action") != "R").head(20)

def prepare_mbp(df: pl.DataFrame, symbol=None) -> pl.DataFrame:
    df = df.filter(pl.col("action") != "R")
    df = df.filter(pl.col("price") != UNDEF_PRICE)
    df = df.with_column(pl.col("price") / FIXED_PRICE_SCALE).alias("price")
    return df

In [None]:
def prepare_symbol(data: pl.DataFrame, date: str, symbol: str):
    # Filter by symbol
    df = data.clone()
    df = df.filter((df["symbol"] == symbol) & (df["size"] != 0))
    # Drop unnecessary columns
    df = df.drop(["__index_level_0__", "ts_recv", "channel_id", "publisher_id", "rtype", "instrument_id", "flags", "sequence", "ts_in_delta"])

    # 
    df = df.with_columns(df["ts_event"].cast(pl.Datetime))
    
    # Filter by action
    df = df.filter((pl.col("action") != "T") & (pl.col("action") != "F"))

    # Filter by date
    df = df.with_columns(df["ts_event"].cast(pl.Date).alias("is_date") == pl.lit(date).str.to_date()) 
    df = df.filter(df["is_date"] == True).drop(["is_date"])
    # Fix the datetimes                                
    df = df.with_columns(df["size"].cast(pl.Int16))

    return df

In [None]:
book = Book()
df = prepare_symbol(data, "2024-01-31", "GOOG")
num_rows = df.shape[0]
best_bids = []
best_asks = []
for i, row in enumerate(tqdm.tqdm(df.iter_rows(named=True))):
    best_bid, best_ask = book.bbo()
    best_bids.append({"price": best_bid.price, "size": best_bid.size})
    best_asks.append({"price": best_ask.price, "size": best_ask.size})
    book.apply(row)
    if i == 10000:
        break


best_bids = pl.DataFrame(best_bids) / FIXED_PRICE_SCALE
best_asks = pl.DataFrame(best_asks) / FIXED_PRICE_SCALE
# Rename to best_bid_price and best_bid_size, best_ask_price and best_ask_size
best_bids = best_bids.rename({"price": "best_bid_price", "size": "best_bid_size"})
best_asks = best_asks.rename({"price": "best_ask_price", "size": "best_ask_size"})
merged = pl.concat([best_bids, best_asks], how="horizontal")

In [None]:

# import matplotlib.pyplot as plt

# limit = len(merged)
# start = 0
# limit = 10**5

# plt.figure(figsize=(20, 10))
# plt.plot(merged["best_bid_price"][start:limit], label="Best Bid Price")
# plt.plot(merged["best_ask_price"][start:limit], label="Best Ask Price")
# plt.legend()
# plt.xlabel('Time')
# plt.ylabel('Price')
# plt.title('Interactive Plot of Bid and Ask Prices')
# plt.show()


In [None]:
import plotly.graph_objs as go
import plotly.offline as pyo

limit = len(merged)
start = 0
limit = 10**5

# merged = merged.to_pandas()

trace1 = go.Scatter(
    x=merged.index[start:limit],
    y=merged["best_bid_size"][start:limit],
    mode='lines',
    name='Best Bid Price'
)

trace2 = go.Scatter(
    x=merged.index[start:limit],
    y=-merged["best_ask_size"][start:limit],
    mode='lines',
    name='Best Ask Price'
)

layout = go.Layout(
    title='Interactive Plot of Bid and Ask Prices',
    xaxis=dict(title='Time'),
    yaxis=dict(title='Price'),
    hovermode='closest'
)

fig = go.Figure(data=[trace1, trace2], layout=layout)
pyo.iplot(fig)
