In [None]:
from datetime import timedelta
from pathlib import Path

import polars as pl

from bdt_common.enums import TradeType
from bhds.holo_kline.merger import Holo1mKlineMerger
from bhds.holo_kline.resampler import HoloKlineResampler

pl.Config.set_tbl_rows(100)

In [None]:
binance_data_dir = Path.home() / 'crypto_data' / 'binance_data'
parsed_dir = binance_data_dir / 'parsed_data'
holo_1m_dir = binance_data_dir / 'holo_1m_klines'
resampled_dir = binance_data_dir / 'resampled_klines'

In [None]:
merger = Holo1mKlineMerger(
    trade_type=TradeType.um_futures, base_dir=parsed_dir, include_vwap=True, include_funding=True
)

kline_1m_file = Path("/tmp/btc_1m.parquet")
ldf = merger.generate("BTCUSDT", kline_1m_file)
ldf.collect()

df = pl.read_parquet(kline_1m_file)

display(df.filter(pl.col("funding_rate").abs() > 1e-8).head())

In [None]:
print(f"{kline_1m_file} exists: {kline_1m_file.exists()}")

ldf = pl.scan_parquet(kline_1m_file)
schema = pl.read_parquet_schema(kline_1m_file)

resampler = HoloKlineResampler("1h")
ldf_resample = resampler.resample(ldf, offset='30m', schema=schema)

df_res = ldf_resample.collect()

df_res = df_res.with_columns((pl.col('candle_end_time') - pl.col('candle_begin_time')).alias('duration'))
keep_condition = pl.col("duration") == timedelta(hours=1)
df_res = df_res.with_row_index(name="_idx").filter(
        (pl.col("_idx") != pl.col("_idx").max()) | keep_condition
    ).drop("_idx")
display(df_res.filter(pl.col('duration') != timedelta(hours=1)))
display(df_res.head())
display(df_res.tail())

In [None]:
resampled_file = resampled_dir / "futures_um/1h/5m/BTCUSDT.parquet"
print(f"{resampled_file} exists: {resampled_file.exists()}")

df_res = pl.read_parquet(resampled_file)

display(pl.concat([df_res.head(2), df_res.tail(2)]))
display(df_res.filter(pl.col("funding_rate").is_not_null()).tail())


In [None]:
resampled_file = resampled_dir / "futures_cm/1h/5m/BTCUSD_PERP.parquet"
print(f"{resampled_file} exists: {resampled_file.exists()}")

df_res = pl.read_parquet(resampled_file)

display(pl.concat([df_res.head(2), df_res.tail(2)]))
display(df_res.filter(pl.col("funding_rate").is_not_null()).tail())
