In [1]:
import mnemosyne as ms
from mnemosyne.engines import ReturnsEngine 
from datetime import datetime as Datetime
import polars as pl
from pathlib import Path
from datetime import date as Date

symbol_enum = pl.Enum(['BTC', 'ETH', 'BTCDOWN', 'BNB', 'SHIB'])

query_lf = pl.DataFrame({
    'symbol': ['BTC', 'ETH', 'BTCDOWN', 'BNB', 'SHIB'], 
    'times': [Datetime(2025, 8, 1, 9, 0, 0)] * 5
}).with_columns(pl.col('symbol').cast(symbol_enum)).lazy()

backend_db = ms.binance.BinanceLastTradesGrid(
            peg_symbol='USDT', 
            grid_interval='10m', 
            dataset_type=ms.DatasetType.BinanceSpotTrades, 
        ).lazyframe()

re = ReturnsEngine(
    backend_db, 
    backend_fair_expr = pl.col('vwap_total_by_base'), 
    backend_time_expr = pl.col('last_trade_time')
)

value = re.query(query_lf, 
    start_time_expr = pl.col('times'), 
    mark_duration=pl.lit('10m'), 
    tick_lag_tolerance=pl.lit('30s'), 
    verbose_debug=False
)
value.collect()



symbol,times,max_tick_to_query_lag,start_fair,end_fair,return
enum,datetime[μs],duration[μs],f64,f64,f64
"""BTC""",2025-08-01 09:00:00,154216µs,114733.002664,114965.474546,0.002026
"""ETH""",2025-08-01 09:00:00,677388µs,3618.49958,3625.498517,0.001934
"""BTCDOWN""",2025-08-01 09:00:00,,,,
"""BNB""",2025-08-01 09:00:00,1s 191853µs,768.270433,769.60263,0.001734
"""SHIB""",2025-08-01 09:00:00,6s 436199µs,1.2e-05,1.2e-05,0.002999


# Collect 10m-gridded data

In [2]:
save_root = Path('~/Documents/binance_gridded_returns')

pivot_dates = [
    Date(2022, 9, 1),
    Date(2023, 6, 1),
    Date(2024, 3, 1),
    Date(2025, 1, 1), 
    Date(2026, 1, 1)
]

In [3]:
query_lf = ms.binance.BinanceLastTradesGrid(
    peg_symbol='USDT',
    grid_interval='10m',
    dataset_type=ms.DatasetType.BinanceSpotTrades, 
).lazyframe()

backend_db = ms.binance.BinanceLastTradesGrid(
            peg_symbol='USDT', 
            grid_interval='4s', 
            dataset_type=ms.DatasetType.BinanceSpotTrades, 
        ).lazyframe()

re = ReturnsEngine(
    backend_db, 
    backend_fair_expr = pl.col('vwap_total_by_base'), 
    backend_time_expr = pl.col('last_trade_time')
)

value = re.query(query_lf, 
    start_time_expr = pl.col('time'), 
    mark_duration=pl.lit('10m'), 
    tick_lag_tolerance=pl.lit('30s')
)
# value.sink_parquet(save_root / 'binance_spot_10m_grid_mark10m.parquet', compression='brotli')

In [None]:
lf = pl.scan_parquet(
    save_root / 'binance_spot_10m_grid_mark10m.parquet'
).drop('start_fair', 'end_fair', 'start_tick_time', 'end_tick_time', 'start_query_time', 'end_query_time')
for j in range(len(pivot_dates) - 1):
    (
        lf.filter(pl.col('date').is_between(pivot_dates[j], pivot_dates[j+1], closed='left'))
        .sink_parquet(save_root / f'binance_spot_10m_grid_mark10m_{j}.parquet', compression='brotli')
    )

In [12]:
qdb = ms.binance.BinanceLastTradesGrid(
    peg_symbol='USDT',
    grid_interval='10m',
    dataset_type=ms.DatasetType.BinanceUmPerpTrades, 
)

In [36]:
from datetime import datetime as Datetime, date as Date

In [None]:
lossless_path = Path(ms.DatasetType.BinanceUmPerpTrades.hive_path(peg_symbol='USDT'))
qdb = pl.scan_parquet(lossless_path / '**/data.parquet', hive_partitioning=True).filter(pl.col('date') >= Date(2025, 1, 1))
qdb.tail(5).collect()

In [None]:
qdb.head(5).collect()

id,price,qty,quote_qty,time,is_buyer_maker,peg_symbol,date,symbol
i64,f64,f64,f64,datetime[μs],bool,str,date,str
429417379,0.033373,1747.0,58.302631,2022-01-01 00:00:03.669,True,"""USDT""",2022-01-01,"""1000SHIB"""
429417380,0.033373,16075.0,536.470975,2022-01-01 00:00:03.669,True,"""USDT""",2022-01-01,"""1000SHIB"""
429417381,0.033373,2861.0,95.480153,2022-01-01 00:00:03.669,True,"""USDT""",2022-01-01,"""1000SHIB"""
429417382,0.03337,9088.0,303.26656,2022-01-01 00:00:03.669,True,"""USDT""",2022-01-01,"""1000SHIB"""
429417383,0.03337,3015.0,100.61055,2022-01-01 00:00:03.669,True,"""USDT""",2022-01-01,"""1000SHIB"""


In [19]:
!ls /bigdata/mnemosyne/binance/lossless/futures/um/last_trade/

'peg_symbol=USDC'  'peg_symbol=USDT'


In [16]:
ms.DatasetType.BinanceUmPerpTrades.hive_path(peg_symbol='USDT')

'/bigdata/mnemosyne/binance/lossless/futures/um/last_trade/peg_symbol=USDT'

In [13]:
dir(ms.DatasetType.BinanceUmPerpTrades)

['BinanceSpotTrades',
 'BinanceUmPerpTrades',
 'HyperliquidPerpL2',
 'HyperliquidPerpTrades',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'grid_hive_path',
 'hive_path',
 'raw_data_path']

In [4]:
query_lf = ms.binance.BinanceLastTradesGrid(
    peg_symbol='USDT',
    grid_interval='10m',
    dataset_type=ms.DatasetType.BinanceUmPerpTrades, 
).lazyframe()

backend_db = ms.binance.BinanceLastTradesGrid(
            peg_symbol='USDT', 
            grid_interval='4s', 
            dataset_type=ms.DatasetType.BinanceUmPerpTrades, 
        ).lazyframe()

re = ReturnsEngine(
    backend_db, 
    backend_fair_expr = pl.col('vwap_total_by_base'), 
    backend_time_expr = pl.col('last_trade_time')
)

value = re.query(query_lf, 
    start_time_expr = pl.col('time'), 
    mark_duration=pl.lit('10m'), 
    tick_lag_tolerance=pl.lit('30s')
)
# value.sink_parquet(save_root / 'binance_futures_10m_grid_mark10m.parquet', compression='brotli')

In [23]:
lf = pl.scan_parquet(
    save_root / 'binance_futures_10m_grid_mark10m.parquet'
).drop('start_fair', 'end_fair', 'start_tick_time', 'end_tick_time', 'start_query_time', 'end_query_time')
for j in range(len(pivot_dates) - 1):
    (
        lf.filter(pl.col('date').is_between(pivot_dates[j], pivot_dates[j+1], closed='left'))
        .sink_parquet(save_root / f'binance_futures_10m_grid_mark10m_{j}.parquet', compression='brotli')
    )

In [6]:
query_df = query_lf.collect()

In [8]:
query_df.filter(pl.col('date').is_between(Date(2023, 1, 1), Date(2024, 1, 1))).select(pl.len())

len
u32
10011433


In [9]:
query_df.filter(pl.col('date').is_between(Date(2024, 1, 1), Date(2025, 1, 1))).select(pl.len())

len
u32
14577751


In [11]:
query_df.filter(pl.col('date').is_between(Date(2025, 1, 1), Date(2025, 11, 1)))

symbol,date,time,peg_symbol,open,high,low,close,volume_base,volume_quote,trade_count,last_trade_time,taker_buy_volume_quote,taker_sell_volume_quote,vwap_taker_buy,vwap_taker_sell,vwap_total_by_base
enum,date,datetime[μs],str,f64,f64,f64,f64,f64,f64,u32,datetime[μs],f64,f64,f64,f64,f64
"""AGLD""",2025-01-01,1970-01-21 02:00:00,"""USDT""",2.4282,2.5564,2.1448,2.5455,2.26478952e8,5.1920e8,2436437,1970-01-21 02:09:35.999999,2.6210e8,2.5710e8,2.296756,2.296941,2.292473
"""ANKR""",2025-01-01,1970-01-21 02:00:00,"""USDT""",0.03359,0.03549,0.03294,0.03538,1.4729798e8,5.0357e6,65464,1970-01-21 02:09:35.975050,2.6130e6,2.4227e6,0.03421,0.034187,0.034187
"""DOT""",2025-01-01,1970-01-21 02:00:00,"""USDT""",6.637,7.14,6.515,7.052,2.5994e7,1.7749e8,518902,1970-01-21 02:09:35.999935,8.9169e7,8.8321e7,6.836149,6.829809,6.827975
"""LIT""",2025-01-01,1970-01-21 02:00:00,"""USDT""",0.912,0.939,0.882,0.93,1.3988e7,1.2835e7,78369,1970-01-21 02:09:35.981491,6.5114e6,6.3240e6,0.917488,0.918088,0.917625
"""OMG""",2025-01-01,1970-01-21 02:00:00,"""USDT""",0.3235,0.3398,0.3194,0.3356,6.8406e6,2.2511e6,33019,1970-01-21 02:09:35.997395,1.0997e6,1.1515e6,0.329015,0.329286,0.329084
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""SAHARA""",2025-10-22,1970-01-21 09:10:00,"""USDT""",0.07415,0.07651,0.073,0.07468,1.77342133e8,1.3266e7,179262,1970-01-21 09:12:57.599654,6.6582e6,6.6080e6,0.074831,0.074797,0.074806
"""MERL""",2025-10-22,1970-01-21 09:10:00,"""USDT""",0.36075,0.36452,0.32863,0.33784,6.0191366e7,2.0991e7,430509,1970-01-21 09:12:57.597712,1.0222e7,1.0769e7,0.349334,0.348429,0.348737
"""PLAY""",2025-10-22,1970-01-21 09:10:00,"""USDT""",0.02655,0.02716,0.02561,0.02643,4.6664741e7,1.2374e6,71374,1970-01-21 09:12:57.598127,571756.95799,665596.77082,0.026528,0.026512,0.026516
"""RONIN""",2025-10-22,1970-01-21 09:10:00,"""USDT""",0.3703,0.3735,0.356,0.3654,2.9915e6,1.0973e6,21284,1970-01-21 09:12:57.589218,543071.05686,554182.90128,0.367057,0.36661,0.366788
