In [None]:
import os
os.chdir('..')

In [None]:
import time

import pandas as pd
import polars as pl

from bhds import aws_kline
from config import Config
from constant import TradeType

In [None]:
import bhds.aws_basics
import bhds.polars_kline


d = Config.BINANCE_DATA_DIR / 'aws_data' / aws_kline.get_aws_dir(bhds.aws_basics.get_kline_path_tokens('um_futures'))
p = d / 'BTCUSDT' / '5m' / 'BTCUSDT-5m-2024-10-28.zip'
print(p)
bhds.polars_kline.read_aws_kline_csv(p)
aws_kline.find_kline_missing_dts_all_symbols(TradeType.um_futures, '1m')

In [None]:
columns = [
    'candle_begin_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_volume', 'trade_num',
    'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
]

types = {
    'candle_begin_time': int,
    'open': float,
    'high': float,
    'low': float,
    'close': float,
    'volume': float,
    'quote_volume': float,
    'trade_num': int,
    'taker_buy_base_asset_volume': float,
    'taker_buy_quote_asset_volume': float
}

t_start = time.perf_counter_ns()
df = pd.DataFrame(klines, columns=columns)
df.drop(columns=['ignore', 'close_time'], inplace=True)


df = df.astype(types)
df['candle_begin_time'] = pd.to_datetime(df['candle_begin_time'], unit='ms', utc=True)

time_ms = (time.perf_counter_ns() - t_start) / 1000_000
print(f'Pandas time {time_ms:.2f} ms')

In [None]:
columns = [
    'candle_begin_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_volume', 'trade_num',
    'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
]
schema = {
    'candle_begin_time': pl.Int64,
    'open': pl.Float64,
    'high': pl.Float64,
    'low': pl.Float64,
    'close': pl.Float64,
    'volume': pl.Float64,
    'quote_volume': pl.Float64,
    'trade_num': pl.Int64,
    'taker_buy_base_asset_volume': pl.Float64,
    'taker_buy_quote_asset_volume': pl.Float64
}

t_start = time.perf_counter_ns()
lf = pl.LazyFrame(klines, schema=columns, orient='row', schema_overrides=schema)
lf = lf.drop('close_time', 'ignore')
lf = lf.with_columns(pl.col('candle_begin_time').cast(pl.Datetime('ms')).dt.replace_time_zone('UTC'))
df = lf.collect()
time_ms = (time.perf_counter_ns() - t_start) / 1000_000
print(f'Polars time {time_ms:.2f} ms')