In [1]:
import os

os.chdir("..")

In [None]:
import logging
import pickle
from pathlib import Path

import pandas as pd
import polars as pl
from dateutil.parser import parse as date_parse

from config import BINANCE_DATA_DIR, TradeType
from config.config import _BASE_DIR
from generate.resample import polars_calc_resample
from generate.util import list_results_kline_symbols
from util.time import convert_interval_to_timedelta
from util.ts_manager import TSManager

pl.Config.set_tbl_rows(30)

In [None]:
parsed_btc_dir = BINANCE_DATA_DIR / 'parsed_data' / 'um_futures' / 'klines' / 'BTCUSDT' / '1m'
btc_ts_mgr = TSManager(parsed_btc_dir)
btc_ts_mgr.read_all().filter(pl.col('candle_begin_time').is_between(date_parse('2024-10-28 19:55:00+00:00'), date_parse('2024-10-28 20:05:00+00:00')))

In [None]:
um_futures_1m_kline_dir = BINANCE_DATA_DIR / "results_data" / "um_futures" / "klines" / "1m"
print(um_futures_1m_kline_dir)

symbol = 'BTCUSDT'
df_btc_1m = pl.read_parquet(um_futures_1m_kline_dir / f'{symbol}.pqt')
display(df_btc_1m.tail())

df_btc_1m.filter(pl.col('candle_begin_time').is_between(date_parse('2024-10-28 19:55:00+00:00'), date_parse('2024-10-28 20:05:00+00:00')))

In [None]:
um_futures_1h_kline_dir = BINANCE_DATA_DIR / "results_data" / "um_futures" / "resampled_klines" / "1h"
df_btc_1h = pl.read_parquet(um_futures_1h_kline_dir / '0m' / 'BTCUSDT.pqt')
display(df_btc_1h.tail())

In [None]:
quantclass_dir = _BASE_DIR / 'quantclass_data'
preprocess_dir = quantclass_dir / 'coin-binance-spot-swap-preprocess-pkl-1h-2025-03-06'
spot_dict = pickle.load(open(preprocess_dir / 'swap_dict.pkl', 'rb'))
df_btc_qtc: pd.DataFrame = spot_dict['BTC-USDT']
df_btc_qtc.tail()

In [None]:
df_aws = df_btc_1h.to_pandas().copy()
df_qtc = df_btc_qtc.copy()
df_qtc['candle_begin_time'] = df_qtc['candle_begin_time'].dt.tz_localize('UTC')
df_qtc.rename({'funding_fee': 'funding_rate'}, axis=1, inplace=True)

print(f'Time {df_aws["candle_begin_time"].min()} -- {df_aws["candle_begin_time"].max()} AWS')

print(f'Time {df_btc_qtc["candle_begin_time"].min()} -- {df_btc_qtc["candle_begin_time"].max()} Quantclass')

begin_ts = max(df_aws['candle_begin_time'].min(), df_qtc['candle_begin_time'].min())
end_ts = min(df_aws['candle_begin_time'].max(), df_qtc['candle_begin_time'].max())
print(f'Time {begin_ts} -- {end_ts}')

df_aws = df_aws[df_aws['candle_begin_time'].between(begin_ts, end_ts)]
print(f'Trimmed shape {df_aws.shape} AWS')

df_qtc = df_qtc[df_qtc['candle_begin_time'].between(begin_ts, end_ts)]
print(f'Trimmed shape {df_qtc.shape} Quantclass')

ts_intersect = set(df_aws['candle_begin_time']).intersection(set(df_qtc['candle_begin_time']))
print(f'Intersecion num candle_begin_time {len(ts_intersect)}') 

df = df_aws.join(df_qtc.set_index('candle_begin_time'), on='candle_begin_time', rsuffix='_qtc')

cols = [
    'open', 'high', 'low', 'close', 'volume', 'quote_volume', 'trade_num', 'taker_buy_base_asset_volume',
    'taker_buy_quote_asset_volume', 'funding_rate'
]

error_begin_time = None
for c in cols:
    df['diff'] = (df[c] - df[f'{c}_qtc'])
    df['diff_abs'] = df['diff'].abs()
    max_diff = df['diff_abs'].max()
    diff_num = (df['diff_abs'] > 1e-4).sum()
    print(f'Column: {c}, max diff {max_diff}, diff num {diff_num}')
    if max_diff > 1e-4:
        display(df.loc[df['diff_abs'] > 1e-4][['candle_begin_time', c, f'{c}_qtc', 'diff']])
        error_begin_time = df[df['diff_abs'] == max_diff].iloc[0]['candle_begin_time']
    df.drop(columns=['diff', 'diff_abs'], inplace=True)

if error_begin_time is not None:
    df_err = pd.concat([
        df_aws.loc[df_aws['candle_begin_time'] == error_begin_time, cols],
        df_qtc.loc[df_qtc['candle_begin_time'] == error_begin_time, cols]
    ])
    print(error_begin_time)
    display(df_err)