In [None]:
%cd /home/stefano/dev/active/spreads-arb
%load_ext line_profiler

import itertools
import logging
import os
import pickle
from datetime import date, datetime, timedelta

import cryptomart as cm
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import requests
import vectorbt as vbt

import argparse
import os
import glob
import pickle

import cryptomart as cm
import pandas as pd
from app.data_prep.dummy import dummy_bid_ask_spreads
from app.data_prep.real import all_bid_ask_spreads, all_funding_rates, all_ohlcv, get_fee_info
from app.feeds import Spread
import app
from IPython.display import display, HTML
from functools import cached_property

# app.data_prep.real.aggregate_tardis_quotes()


In [None]:
class SpreadViewer:
    def __init__(self):
        self.path = "/home/stefano/dev/active/spreads-arb/data/tick_quotes_1m_agg"
        self.filepath_map = pd.Series(index=pd.MultiIndex.from_arrays([[], []], names=["exchange", "symbol"]))

        for filepath in glob.glob(os.path.join(self.path, "*", "*")):
            *_, exchange, filename = filepath.split(os.path.sep)
            symbol = os.path.splitext(filename)[0]
            self.filepath_map.at[exchange, symbol] = filepath

    def list_exchanges_for_symbol(self, symbol):
        return self.filepath_map.loc[:, symbol].dropna().index.tolist()

    def load_quote(self, exchange, symbol):
        filepath = self.filepath_map.loc[exchange, symbol]
        df = pd.read_parquet(filepath)
        window = df.mid_price.rolling(30*24*60)
        df["30_day_zscore"] = (df.mid_price - window.mean()) / window.std()
        return df

    def plot_quote(self, exchange=None, symbol=None, hide_daily=True, quote=None):
        if quote is not None:
            x = quote
        else:
            x = self.load_quote(exchange, symbol)
        if hide_daily:
            x = x.loc["2023-04-10 00:00:00":]
        # Create subplots with shared x-axis
        fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1, row_heights=[0.5, 0.3, 0.2], specs=[[{"secondary_y": True}], [{"secondary_y": False}], [{"secondary_y": False}]])

        # Plot the interpolated data in the first subplot
        fig.add_trace(go.Scatter(x=x.index, y=x.bid_price, mode="lines", name="bid_price"), row=1, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.ask_price, mode="lines", name="ask_price"), row=1, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.mid_price, mode="lines", name="mid_price"), row=1, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x["30_day_zscore"], mode="lines", name="30_day_zscore"), row=1, col=1, secondary_y=True)
        fig.add_trace(go.Scatter(x=x.index, y=x.bid_amount, mode="lines", name="bid_amount"), row=2, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.ask_amount, mode="lines", name="ask_amount"), row=2, col=1)

        
        fig.add_trace(go.Scatter(
            x=x.index,
            y=x.filled.astype(int),
            mode="lines",
            name="filled",
        ), row=3, col=1)

        fig.add_shape(
            type="rect",
            xref="paper",
            yref="y2",
            x0=0,
            y0=1,
            x1=1,
            y1=-1,
            fillcolor="gray",
            opacity=0.2,
            layer="below",
            line_width=0,
        )

        # Set axes labels and title
        fig.update_layout(
            height=800,
            width=1800,
            xaxis_showticklabels=True,
            xaxis2_showticklabels=True,
            hovermode="x unified",
            hoverlabel={"namelength": -1},
            yaxis2=dict(range=[-3, 3])
        )

        fig.show()
        display(self.quote_stats(exchange, symbol))

    def quote_stats(self, exchange, symbol):
        quote = self.load_quote(exchange, symbol)
        quote = quote.loc["2023-04-10 00:00:00":]
        return pd.Series(
            {
                "missing_rows": quote.filled.sum(),
                "missing_pct": quote.filled.sum() / len(quote),
                "mid_price_variance": quote.mid_price.var(),
                "bid_size_variance": quote.bid_amount.var(),
                "ask_size_variance": quote.ask_amount.var(),
                "start_missing": quote.iloc[: len(quote) // 3].filled.sum() / (len(quote) // 3),
                "middle_missing": quote.iloc[(len(quote) // 3) : 2 * len(quote) // 3].filled.sum() / (len(quote) // 3),
                "end_missing": quote.iloc[2 * (len(quote) // 3) :].filled.sum() / (len(quote) // 3),
            }
        )
        
    def load_spread(self, symbol, exchange_0, exchange_1):
        exchange_list = self.list_exchanges_for_symbol(symbol)
        assert exchange_0 in exchange_list
        assert exchange_1 in exchange_list
        
        df_0 = self.load_quote(exchange_0, symbol)
        df_1 = self.load_quote(exchange_1, symbol)
        spread = df_0.merge(df_1, left_index=True, right_index=True)
        spread["bid_price"] = spread.eval("bid_price_y - bid_price_x")
        spread["ask_price"] = spread.eval("ask_price_y - ask_price_x")
        spread["mid_price"] = spread.eval("mid_price_y - mid_price_x")
        spread["bid_amount"] = spread.eval("(bid_amount_y + bid_amount_x) / 2")
        spread["ask_amount"] = spread.eval("(ask_amount_y + ask_amount_x) / 2")
        # spread["30_day_zscore"] = (spread.mid_price - spread.mid_price.rolling(60 * 24 * 30).mean()) / spread.mid_price.rolling(60 * 24 * 30).std()
        spread["30_day_zscore"] = (spread.mid_price - spread.mid_price.rolling("30D").mean()) / spread.mid_price.rolling("30D").std()
        spread.loc[spread["30_day_zscore"] < -1, "real_price"] = spread.loc[spread["30_day_zscore"] < -1].eval("ask_price_y - bid_price_x")
        spread.loc[spread["30_day_zscore"] < -1, "real_amount"] = spread.loc[spread["30_day_zscore"] < -1].apply(lambda df: min(df.ask_amount_y, df.bid_amount_x), axis=1)
        spread.loc[spread["30_day_zscore"] > 1, "real_price"] = spread.loc[spread["30_day_zscore"] > 1].eval("bid_price_y - ask_price_x")
        spread.loc[spread["30_day_zscore"] > 1, "real_amount"] = spread.loc[spread["30_day_zscore"] > 1].apply(lambda df: min(df.bid_amount_y, df.ask_amount_x), axis=1)
        spread = spread[["bid_price", "bid_amount", "ask_price", "ask_amount", "mid_price", "filled_x", "filled_y", "30_day_zscore", "real_price", "real_amount"]]
        return spread
        
    def plot_spread(self, symbol, exchange_0, exchange_1, hide_daily=True):
        x = self.load_spread(symbol, exchange_0, exchange_1)
        if hide_daily:
            x = x.loc["2023-04-10 00:00:00":]

        # Create subplots with shared x-axis
        fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.1, row_heights=[0.5, 0.3, 0.2], specs=[[{"secondary_y": True}], [{"secondary_y": False}], [{"secondary_y": False}]])

        # Plot the interpolated data in the first subplot
        fig.add_trace(go.Scatter(x=x.index, y=x.bid_price, mode="lines", name="bid_price"), row=1, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.ask_price, mode="lines", name="ask_price"), row=1, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.mid_price, mode="lines", name="mid_price"), row=1, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.real_price, mode="lines", name="real_price"), row=1, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x["30_day_zscore"], mode="lines", name="30_day_zscore", opacity=0.4), row=1, col=1, secondary_y=True)
        fig.add_trace(go.Scatter(x=x.index, y=x.bid_amount, mode="lines", name="bid_amount"), row=2, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.ask_amount, mode="lines", name="ask_amount"), row=2, col=1)
        fig.add_trace(go.Scatter(x=x.index, y=x.real_amount, mode="lines", name="real_amount"), row=2, col=1)

        
        fig.add_trace(go.Scatter(
            x=x.index,
            y=x.filled_x.astype(int),
            mode="lines",
            name="filled_0",
        ), row=3, col=1)
        
        fig.add_trace(go.Scatter(
            x=x.index,
            y=x.filled_y.astype(int),
            mode="lines",
            name="filled_1",
        ), row=3, col=1)

        fig.add_shape(
            type="rect",
            xref="paper",
            yref="y2",
            x0=0,
            y0=1,
            x1=1,
            y1=-1,
            fillcolor="gray",
            opacity=0.2,
            layer="below",
            line_width=0,
        )

        # Set axes labels and title
        fig.update_layout(
            height=800,
            width=1800,
            xaxis_showticklabels=True,
            xaxis2_showticklabels=True,
            hovermode="x unified",
            hoverlabel={"namelength": -1},
            yaxis2=dict(range=[-3, 3])
        )

        fig.show()  

    def spread_stats(self, symbol, exchange_0, exchange_1):
        spread = self.load_spread(symbol, exchange_0, exchange_1)
        spread = spread.loc["2023-04-10 00:00:00":]
        return pd.Series(
            {
                "missing_rows_0": spread.filled_x.sum(),
                "missing_pct_0": spread.filled_x.sum() / len(spread),
                "missing_rows_1": spread.filled_y.sum(),
                "missing_pct_1": spread.filled_y.sum() / len(spread),
                "mid_price_variance": spread.mid_price.var(),
                "bid_size_variance": spread.bid_amount.var(),
                "ask_size_variance": spread.ask_amount.var(),
                "start_missing_0": spread.iloc[: len(spread) // 3].filled_x.sum() / (len(spread) // 3),
                "middle_missing_0": spread.iloc[(len(spread) // 3) : 2 * len(spread) // 3].filled_x.sum() / (len(spread) // 3),
                "end_missing_0": spread.iloc[2 * (len(spread) // 3) :].filled_x.sum() / (len(spread) // 3),
                "start_missing_1": spread.iloc[: len(spread) // 3].filled_y.sum() / (len(spread) // 3),
                "middle_missing_1": spread.iloc[(len(spread) // 3) : 2 * len(spread) // 3].filled_y.sum() / (len(spread) // 3),
                "end_missing_1": spread.iloc[2 * (len(spread) // 3) :].filled_y.sum() / (len(spread) // 3),
            }
        )

    @cached_property
    def all_quote_stats(self):
        return pd.DataFrame(
            {
                (exchange, symbol): self.quote_stats(exchange, symbol)
                for exchange, symbol in self.filepath_map.index
            }
        ).T

sv = SpreadViewer()

In [None]:
sv.list_exchanges_for_symbol("DOGE")

In [None]:
x = sv.load_quote("binance", "DOGE")

In [None]:
x.loc["2023-03-12":"2023-04-09 23:59"].resample("1d").first().drop(columns="30_day_zscore")

In [None]:
sv.plot_quote("binance", "DOGE", hide_daily=False)

In [None]:
sv.plot_quote("binance", "DOGE", hide_daily=False)

In [None]:
sv.plot_quote(quote=pd.concat([x.loc["2023-03-11":"2023-04-09 23:59"].resample("1d").first(), x.drop(x.loc["2023-03-11":"2023-04-09 23:59"].index)]), hide_daily=False)

In [None]:
sv.plot_quote("binance", "DOGE", hide_daily=False)

In [None]:
x = sv.load_spread("DOGE", "binance", "bitmex")

In [None]:
sv.spread_stats("DOGE", "binance", "bitmex")

In [None]:
sv.plot_spread("DOGE", "binance", "bitmex")

In [None]:
quote = sv.load_quote("binance", "BTC")

In [None]:
quote.filled.astype(int).sum()

In [None]:
sv.all_quote_stats.sort_values("missing_pct", ascending=False).head(50)

In [None]:
sv.plot_quote("bybit", "AUDIO")

In [None]:
sv.quote_stats("binance", "BTC")