In [1]:
import numpy as np 
import sklearn 
import xgboost as xgb 
import mnemosyne as ms
from datetime import datetime as Datetime, date as Date
from mnemosyne.engines import ReturnsEngine
import polars as pl

In [2]:
peg_symbol = 'USDT'
dstype = ms.DatasetType.BinanceSpotTrades
grid_interval = '10m'

dataset = ms.binance.BinanceLastTradesGrid(
        peg_symbol=peg_symbol, 
        grid_interval=grid_interval, 
        dataset_type=dstype, 
        parquet_names='*.parquet', 
        num_workers=4, 
    )

df = dataset.lazyframe().collect()
df

symbol,date,time,peg_symbol,open,high,low,close,volume_base,volume_quote,trade_count,last_trade_time,taker_buy_volume_quote,taker_sell_volume_quote,vwap_taker_buy,vwap_taker_sell,vwap_total_by_base
enum,date,datetime[μs],str,f64,f64,f64,f64,f64,f64,u32,datetime[μs],f64,f64,f64,f64,f64
"""XLM""",2022-01-01,2022-01-01 00:00:00,"""USDT""",0.2665,0.2686,0.266,0.2669,1.682608e6,449595.1697,1007,2022-01-01 00:09:56.967,195359.3536,254235.8161,0.267193,0.26721,0.267201
"""PERP""",2022-01-01,2022-01-01 00:00:00,"""USDT""",8.77,8.9,8.74,8.83,10498.0,92496.9352,392,2022-01-01 00:09:26.638,70731.0138,21765.9214,8.811472,8.809651,8.81091
"""AION""",2022-01-01,2022-01-01 00:00:00,"""USDT""",0.1391,0.1402,0.1391,0.1398,36875.0,5154.0818,79,2022-01-01 00:09:55.096,3155.8063,1998.2755,0.139787,0.13975,0.139772
"""ADX""",2022-01-01,2022-01-01 00:00:00,"""USDT""",0.5547,0.5578,0.5547,0.5568,112469.0,62606.9016,65,2022-01-01 00:08:48.913,31870.6309,30736.2707,0.55614,0.557201,0.556659
"""BNX""",2022-01-01,2022-01-01 00:00:00,"""USDT""",63.6,64.0,63.5,63.8,82.171,5241.5141,61,2022-01-01 00:09:31.040,4429.7186,811.7955,63.800477,63.720681,63.787883
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""COTI""",2025-10-18,2025-10-18 23:50:00,"""USDT""",0.03406,0.03416,0.03406,0.03414,29848.0,1018.3768,33,2025-10-18 23:59:17.681144,742.87559,275.50121,0.034122,0.034109,0.034119
"""VIC""",2025-10-18,2025-10-18 23:50:00,"""USDT""",0.1521,0.1524,0.1521,0.1524,5394.6,821.33166,9,2025-10-18 23:59:20.512505,821.33166,0.0,0.152251,,0.152251
"""CITY""",2025-10-18,2025-10-18 23:50:00,"""USDT""",0.787,0.79,0.787,0.79,920.25,724.59841,27,2025-10-18 23:59:50.775486,84.01451,640.5839,0.789388,0.787132,0.787393
"""AMP""",2025-10-18,2025-10-18 23:50:00,"""USDT""",0.002543,0.002551,0.002543,0.00255,530753.0,1351.788642,40,2025-10-18 23:59:51.217795,1289.409445,62.379197,0.002547,0.002546,0.002547


In [3]:
backend_ds = ms.binance.BinanceLastTradesGrid(
            peg_symbol='USDT', 
            grid_interval='4s', 
            dataset_type=ms.DatasetType.BinanceSpotTrades, 
        )

re = ReturnsEngine(
    backend_ds.lazyframe(),
    backend_fair_expr = pl.col('vwap_total_by_base'), 
    backend_time_expr = pl.col('last_trade_time')
)

result = re.query(dataset.lazyframe().select('date', 'symbol', 'last_trade_time'), 
    start_time_expr = pl.col('last_trade_time'), 
    mark_duration=pl.lit('10m'), 
    tick_lag_tolerance=pl.lit('30s'), 
    verbose_debug=False
)

In [5]:
returns = result.collect()



In [6]:
returns

date,symbol,last_trade_time,start_query_time,end_query_time,start_tick_time,end_tick_time,max_tick_to_query_lag,start_fair,end_fair,return
date,enum,datetime[μs],datetime[μs],datetime[μs],datetime[μs],datetime[μs],duration[μs],f64,f64,f64
2022-01-01,"""XLM""",2022-01-01 00:09:56.967,2022-01-01 00:09:56.967,2022-01-01 00:19:56.967,2022-01-01 00:09:56.967,2022-01-01 00:19:52.336,4s 631ms,0.2669,0.2667,-0.000749
2022-01-01,"""PERP""",2022-01-01 00:09:26.638,2022-01-01 00:09:26.638,2022-01-01 00:19:26.638,2022-01-01 00:09:26.638,2022-01-01 00:19:21.718,4s 920ms,8.83,8.77,-0.006795
2022-01-01,"""AION""",2022-01-01 00:09:55.096,2022-01-01 00:09:55.096,2022-01-01 00:19:55.096,2022-01-01 00:09:55.096,2022-01-01 00:19:51.469,3s 627ms,0.139729,0.1395,-0.001637
2022-01-01,"""ADX""",2022-01-01 00:08:48.913,2022-01-01 00:08:48.913,2022-01-01 00:18:48.913,2022-01-01 00:08:48.913,2022-01-01 00:12:13.091,6m 35s 822ms,0.556802,,
2022-01-01,"""BNX""",2022-01-01 00:09:31.040,2022-01-01 00:09:31.040,2022-01-01 00:19:31.040,2022-01-01 00:09:31.040,2022-01-01 00:16:27.981,3m 3s 59ms,63.8,,
…,…,…,…,…,…,…,…,…,…,…
2025-10-18,"""COTI""",2025-10-18 23:59:17.681144,2025-10-18 23:59:17.681144,2025-10-19 00:09:17.681144,2025-10-18 23:59:17.681144,2025-10-18 23:59:17.681144,10m,0.03414,,
2025-10-18,"""VIC""",2025-10-18 23:59:20.512505,2025-10-18 23:59:20.512505,2025-10-19 00:09:20.512505,2025-10-18 23:59:20.512505,2025-10-18 23:59:20.512505,10m,0.152309,,
2025-10-18,"""CITY""",2025-10-18 23:59:50.775486,2025-10-18 23:59:50.775486,2025-10-19 00:09:50.775486,2025-10-18 23:59:50.775486,2025-10-18 23:59:50.775486,10m,0.79,,
2025-10-18,"""AMP""",2025-10-18 23:59:51.217795,2025-10-18 23:59:51.217795,2025-10-19 00:09:51.217795,2025-10-18 23:59:51.217795,2025-10-18 23:59:51.217795,10m,0.00255,,


In [12]:
returns_date = returns.filter(pl.col('date') == Date(2025, 1, 1))
returns_date.filter(pl.col('symbol') == 'NEO')

date,symbol,last_trade_time,start_query_time,end_query_time,start_tick_time,end_tick_time,max_tick_to_query_lag,start_fair,end_fair,return
date,enum,datetime[μs],datetime[μs],datetime[μs],datetime[μs],datetime[μs],duration[μs],f64,f64,f64
2025-01-01,"""NEO""",2025-01-01 00:09:44.397890,2025-01-01 00:09:44.397890,2025-01-01 00:19:44.397890,2025-01-01 00:09:44.397890,2025-01-01 00:19:19.404732,24s 993158µs,13.64,13.64,-1.3023e-16
2025-01-01,"""NEO""",2025-01-01 00:19:19.404732,2025-01-01 00:19:19.404732,2025-01-01 00:29:19.404732,2025-01-01 00:19:19.404732,2025-01-01 00:29:19.403932,800µs,13.64,13.63,-0.000733
2025-01-01,"""NEO""",2025-01-01 00:29:40.341621,2025-01-01 00:29:40.341621,2025-01-01 00:39:40.341621,2025-01-01 00:29:40.341621,2025-01-01 00:39:26.362909,13s 978712µs,13.62,13.65,0.002203
2025-01-01,"""NEO""",2025-01-01 00:39:26.362909,2025-01-01 00:39:26.362909,2025-01-01 00:49:26.362909,2025-01-01 00:39:26.362909,2025-01-01 00:49:19.741927,6s 620982µs,13.65,13.66,0.000733
2025-01-01,"""NEO""",2025-01-01 00:49:59.790966,2025-01-01 00:49:59.790966,2025-01-01 00:59:59.790966,2025-01-01 00:49:59.790966,2025-01-01 00:59:50.590465,9s 200501µs,13.66,13.74,0.005857
…,…,…,…,…,…,…,…,…,…,…
2025-01-01,"""NEO""",2025-01-01 23:19:37.569940,2025-01-01 23:19:37.569940,2025-01-01 23:29:37.569940,2025-01-01 23:19:37.569940,2025-01-01 23:29:17.764672,19s 805268µs,14.31,14.38,0.004892
2025-01-01,"""NEO""",2025-01-01 23:29:50.327524,2025-01-01 23:29:50.327524,2025-01-01 23:39:50.327524,2025-01-01 23:29:50.327524,2025-01-01 23:39:46.186203,4s 141321µs,14.37,14.356293,-0.000954
2025-01-01,"""NEO""",2025-01-01 23:39:56.308119,2025-01-01 23:39:56.308119,2025-01-01 23:49:56.308119,2025-01-01 23:39:56.308119,2025-01-01 23:49:44.850522,11s 457597µs,14.36,14.34,-0.001393
2025-01-01,"""NEO""",2025-01-01 23:49:56.469774,2025-01-01 23:49:56.469774,2025-01-01 23:59:56.469774,2025-01-01 23:49:56.469774,2025-01-01 23:59:39.532910,16s 936864µs,14.34,14.36,0.001395


In [21]:
agg_returns = returns.group_by('symbol', 'date').agg(
    (pl.col('return').null_count() / pl.col('return').count()).alias('returns_null_frac'), 
    pl.col('return').sum(),
    pl.col('max_tick_to_query_lag').max()
)

In [22]:
agg_returns

symbol,date,returns_null_frac,return,max_tick_to_query_lag
enum,date,f64,f64,duration[μs]
"""GNO""",2025-04-30,0.756098,0.011761,2m 31s 716983µs
"""SAGA""",2024-10-30,0.006993,-0.012535,36s 2ms
"""ALICE""",2022-03-09,0.0,0.033884,21s 868ms
"""BANANA""",2024-10-19,0.099237,-0.020077,1m 47s 825ms
"""ZEC""",2024-02-18,0.090909,0.026273,1m 21s 75ms
…,…,…,…,…
"""ZK""",2024-06-18,0.0,0.066682,8s 787ms
"""IMX""",2024-09-16,0.180328,-0.035397,2m 4s 289ms
"""NXPC""",2025-09-30,0.035971,-0.085725,52s 27286µs
"""PROS""",2024-02-05,1.716981,0.024464,10m


In [45]:
null_cumfrac_by_date = (
    agg_returns.sort('date').with_columns(
        pl.col('returns_null_frac').cum_sum().alias('cumsum_returns_null_frac').over('symbol')
    ).pivot(
        on='symbol', 
        index='date', 
        values='cumsum_returns_null_frac'
    ).sort('date').with_columns(pl.col(pl.Float64).forward_fill())
)

In [52]:
returns.write_parquet('./plotting_dev_parquet.parquet', compression_level=3)