In [None]:
import asyncio
import json
import time
import pandas as pd
import numpy as np
import datetime
from datetime import timedelta

In [None]:
df = pd.read_csv('./data/bybit_trade_orderbook_2021-06-30.csv', index_col=0)
df

In [None]:
df = df.drop(['_id'], axis=1)
#df = df.rename(columns={'trade_size': 'quantity', 'trade_side': 'side', 'time': 'created_at'})
df = df.rename(columns={'size': 'quantity', 'time': 'created_at'})
df['created_at'] = pd.to_datetime(df['created_at'], unit='ms').dt.floor("S")
#df = df[:200]
df

In [None]:
fp_df = df.copy()
fp_df = fp_df.drop(['orderbook'], axis=1)
fp_df.set_index("created_at", inplace=True)
fp_df

In [None]:
print(fp_df.index[0])
print(fp_df.index[len(fp_df)-1])

In [None]:
import math
import matplotlib.pyplot as plt
#%matplotlib inline
%matplotlib qt

# parameters
freq = 5 #足
num_candlesticks = 35 #forループする回数、ローソク足の数
if freq == 5:
    cvd_bar_width = 250.0
    vp_max = 1500000
    footprint_buy_x = 2
    vp_x_adjust = 2
elif freq == 15:
    cvd_bar_width = 550.0
    vp_max = 3500000
    footprint_buy_x = 5
    vp_x_adjust = 3


delta = 0
cvd_bottom = 0

millnames = ['','K','M','B','T']

def millify(n):
    n = float(n)
    millidx = max(0, min(len(millnames)-1, int(math.floor(0 if n == 0 else math.log10(abs(n))/3))))
    return '{:.0f}{}'.format(n / 10**(3 * millidx), millnames[millidx])


fig = plt.figure(figsize=(24,18), constrained_layout=True)
gs = fig.add_gridspec(2, 1, height_ratios=[3, 1])
ax1 = fig.add_subplot(gs[0, :])
ax2 = fig.add_subplot(gs[1, :])
"""
plt.figure(figsize = (24,18))
"""
xlim_start = 0
xlim_end = 0
ylim_low = float('inf')
ylim_high = 0


imbalance_threshold = {}
for i in range(1,21):
    A = 1 # current buy
    B = i # previous sell
    minus = (A - B) / B # 個々の計算結果が300%とかになってほしい　1/4=0.25
    plus = (B - A) / A
    imbalance_threshold[str(i*100)+'%'] = [plus, minus]


def separate_buy_sell(side):
    return fp_df_groupby.query("side == '{}'".format(side)).copy(deep=True).drop(['side'], axis=1)


snapshot_start = datetime.datetime(2021, 6, 29, 6, freq, 0)
snapshot_end = start + timedelta(minutes=freq)
xlim_start = snapshot_start.timestamp()

for n in range(num_candlesticks): # 10本5分足をplot
    snapshot_start += timedelta(minutes=freq)
    snapshot_end = snapshot_start + timedelta(minutes=freq)
    # 5分間のデータを抽出
    fp_df_new = fp_df.query("'{}' <= created_at <= '{}'".format(snapshot_start, snapshot_end))
    
    # priceとsideでまとめる
    fp_df_groupby = fp_df_new.groupby(['price', 'side'], as_index = False).sum()
    
    # buyとsellに分ける
    trade_buy = separate_buy_sell("Buy").rename(columns={'quantity': 'buy'})
    trade_sell = separate_buy_sell("Sell").rename(columns={'quantity': 'sell'})
    # deltaを計算する
    cvd_bottom += delta
    delta = trade_buy['buy'].sum() + trade_sell['sell'].sum() * -1
    
    # 0.5刻みのprice列を作成する
    price_range = np.arange(fp_df_groupby["price"].min(), fp_df_groupby["price"].max(), 0.5)
    footprint_df = pd.DataFrame({'price': price_range})
    
    # merge
    footprint_df = pd.merge(footprint_df, trade_sell, on='price', how='left').fillna(0)
    footprint_df = pd.merge(footprint_df, trade_buy, on='price', how='left').fillna(0)
    
    # imbalanceを表現するために差のパーセンテージを作成する
    pct_diff = footprint_df.copy()
    pct_diff.set_index("price", inplace=True)
    pct_diff['buy'] = pct_diff['buy'].shift()
    pct_diff = pct_diff.pct_change(axis=1)
    buy_color = np.where(pct_diff['buy'] >= 2, 'blue', 'black')
    sell_color = np.where(pct_diff['buy'] <= -0.67, 'red', 'black')
    
    # ダウンサンプリング
    #footprint_df[['sell', 'buy']] = footprint_df[['sell', 'buy']].rolling(40).sum() # 20=10$
    #footprint_df = footprint_df.iloc[40::20, :] # ここをどうにか工夫して抽出するようにする。下の余りがスライスでは取れない
    footprint_df[['sell', 'buy']] = footprint_df[['sell', 'buy']].rolling(60).sum().shift(-59) # 20=10$, 40=20$
    footprint_df = footprint_df.iloc[::60, :].dropna() # 欠損値があればその行を削除
    
    
    # ylim用のpriceの最高値最低値の更新
    if ylim_low > footprint_df['price'].min():
        ylim_low = footprint_df['price'].min()
    if ylim_high < footprint_df['price'].max():
        ylim_high = footprint_df['price'].max()
    
    # plot candlestick
    candle_x = snapshot_start.timestamp()
    open_price = fp_df_new["price"][0]
    close_price = fp_df_new["price"][-1]
    if close_price > open_price:
        ax1.vlines(x = candle_x, ymin = open_price, ymax = close_price, color = 'green', linewidth = 4)
    if close_price < open_price:
        ax1.vlines(x = candle_x, ymin = close_price, ymax = open_price, color = 'red', linewidth = 4)
    if close_price == open_price:
        ax1.vlines(x = candle_x, ymin = close_price, ymax = open_price, color = 'black', linewidth = 4)  
    
    
    # plot footprint
    x_sell = snapshot_start.timestamp()
    x_buy = (snapshot_start + timedelta(minutes=footprint_buy_x)).timestamp() # 5分足なら2, 15分足なら5
    y = footprint_df['price'].values
    txt_buy = footprint_df['buy'].astype(int).values
    txt_sell = footprint_df['sell'].astype(int).values
    for i in range(len(y)):
        ax1.text(x_buy, y[i], millify(txt_buy[i]), c=buy_color[i])
        ax1.text(x_sell, y[i], millify(txt_sell[i]), c=sell_color[i])
    # volume profile
    vp = txt_buy + txt_sell
    vp_x = (snapshot_start + timedelta(minutes=vp_x_adjust)).timestamp()
    vp_x_arr = np.array([vp_x] * len(y)) 
    ax1.scatter(vp_x_arr, y, marker='_', vmin=0, vmax=vp_max, c=vp, cmap='Blues', s=700, linewidths=10, alpha=0.6)
    
    # CVD
    ax2.bar(x_buy, delta, width=cvd_bar_width, bottom=cvd_bottom, color='green' if delta > 0 else 'red')
    
    

xlim_end = snapshot_end.timestamp()
plt.xlim(xlim_start, xlim_end)
ax1.set_ylim(ylim_low, ylim_high)
#ax2.set_ylim(ylim_low, ylim_high)
ax1.grid()
plt.show()