In [None]:
import os

import pandas as pd

from config import Config

_DIR = Config.BINANCE_DATA_DIR
SPOT_DIR = os.path.join(_DIR, 'candle_parquet_fixed', 'spot', '1h')
USDT_FUTURES_DIR = os.path.join(_DIR, 'candle_parquet_fixed', 'usdt_futures', '1h')

print(SPOT_DIR)

In [None]:
def read(type_, symbol):
    if type_ == 'spot':
        df = pd.read_parquet(os.path.join(SPOT_DIR, f'{symbol}.pqt'))
    elif type_ == 'usdt_futures':
        df = pd.read_parquet(os.path.join(USDT_FUTURES_DIR, f'{symbol}.pqt'))

    df = df[df['volume'] > 0]
    return df[['candle_begin_time', 'open', 'high', 'low', 'close', 'volume']]


read('spot', 'VIDTUSDT').loc['2022-10-31 02:00:00+00:00':].head()

In [None]:
def check(df):
    df['time_diff'] = df['candle_begin_time'].diff()

    gaps = []
    idxes = df[df['time_diff'] > df['time_diff'].min()].index
    for idx in idxes:
        tail = df.loc[:idx].tail(2)
        time_before = tail.index[0]
        time_after = tail.iloc[1]['candle_begin_time']
        time_gap = time_after - time_before
        price_change = tail.iloc[1]['open'] / tail.iloc[0]['close'] - 1
        gaps.append((time_before, time_after, time_gap, price_change))

    completeness = len(df) / ((df.index[-1] - df['candle_begin_time'].iloc[0]) // pd.Timedelta(hours=1))
    return pd.DataFrame(gaps, columns=['delist_time', 'relist_time', 'time_gap', 'price_change']), completeness

def check_gaps(type_, symbol):
    df = read(type_, symbol)
    df_result, completeness = check(df)
    df_result['type'] = type_
    df_result['symbol'] = symbol
    return df_result, completeness


df_check, completeness = check_gaps('spot', 'VIDTUSDT')
print(completeness)
display(df_check)

In [None]:
df_check, completeness = check_gaps('spot', 'VENUSDT')
print(completeness)
display(df_check.sort_values('price_change'))

read('spot', 'VENUSDT')

In [None]:
STABLECOINS = {'BKRWUSDT', 'USDCUSDT', 'USDPUSDT', 'TUSDUSDT', 'BUSDUSDT', 'FDUSDUSDT', 'DAIUSDT', 'EURUSDT', 'GBPUSDT',
               'USBPUSDT', 'SUSDUSDT', 'PAXGUSDT', 'AEURUSDT', 'USDSUSDT', 'USDSBUSDT'}

BLACKLIST = {}

def filter_symbols(symbols):
    lev_symbols = {x for x in symbols if x.endswith(('UPUSDT', 'DOWNUSDT', 'BEARUSDT', 'BULLUSDT')) and x != 'JUPUSDT'}
    not_usdt_symbols = {x for x in symbols if not x.endswith('USDT')}

    excludes = set.union(not_usdt_symbols, lev_symbols, STABLECOINS, BLACKLIST).intersection(symbols)

    symbols_filtered = sorted(set(symbols) - excludes)
    return symbols_filtered

def get_filtered_symbols(type_):
    if type_ == 'spot':
        input_dir = SPOT_DIR
    elif type_ == 'usdt_futures':
        input_dir = USDT_FUTURES_DIR
    else:
        raise ValueError('%s is not supported', type_)

    symbols = sorted(os.path.splitext(x)[0] for x in os.listdir(input_dir))
    symbols = filter_symbols(symbols)
    return symbols

get_filtered_symbols('spot')[:5]

In [None]:
symbols = get_filtered_symbols('spot')

for symbol in symbols:
    df_check, completeness = check_gaps('spot', symbol)
    if completeness < 0.95 or df_check['price_change'].abs().max() > 0.06:
        print(symbol, completeness, len(df_check))
        display(df_check[df_check['price_change'].abs() > 0.06])

# dfs = [check_gaps('spot',  symbol) for symbol in symbols]
# dfs = [df for df in dfs if len(df)]
# df_gap = pd.concat(dfs, ignore_index=True)

# threshold = pd.Timedelta(days=2)

# df_gap_short = df_gap[df_gap['time_gap'] <  threshold]
# df_gap_long = df_gap[df_gap['time_gap'] >= threshold].reset_index(drop=True)

# print(df_gap_long.to_markdown(index=False))

In [None]:
symbols = get_filtered_symbols('spot')

for symbol in symbols:
    df_check, completeness = check_gaps('spot', symbol)
    # if completeness < 1:
        # print(symbol, completeness, len(df_check))
    if completeness < 0.95 or df_check['price_change'].abs().max() > 0.06:
        display(df_check.sort_values('price_change', key=abs).tail())


In [None]:
symbols = get_filtered_symbols('usdt_futures')

for symbol in symbols:
    df_check, completeness = check_gaps('usdt_futures', symbol)
    if completeness < 1:
        print(symbol, completeness, len(df_check))
    # if completeness < 0.95 or df_check['price_change'].abs().max() > 0.06:
        display(df_check.sort_values('price_change', key=abs).tail())
