# Lossless -> gridded 

Grids `last_trade` data: 

- Each `BinanceLastTradesGrid` reads the underlying lossless dataset `symbol_date_df` to determine symbol-date pairs. 
- Each dataset maintains a list of "validated" dates. Second runs will be very quick

In [1]:
import mnemosyne as ms 
import polars as pl

args = [
    (ms.DatasetType.BinanceSpotTrades, '20s', 'USDC'),
    (ms.DatasetType.BinanceSpotTrades, '2m', 'USDC'),
    (ms.DatasetType.BinanceSpotTrades, '10m', 'USDC'),

    (ms.DatasetType.BinanceUmPerpTrades, '20s', 'USDC'),
    (ms.DatasetType.BinanceUmPerpTrades, '2m', 'USDC'),
    (ms.DatasetType.BinanceUmPerpTrades, '10m', 'USDC'),

    (ms.DatasetType.BinanceSpotTrades, '20s', 'USDT'), 
    (ms.DatasetType.BinanceSpotTrades, '2m', 'USDT'),
    (ms.DatasetType.BinanceSpotTrades, '4s', 'USDT'),
    (ms.DatasetType.BinanceSpotTrades, '10m', 'USDT'),

    (ms.DatasetType.BinanceUmPerpTrades, '20s', 'USDT'),
    (ms.DatasetType.BinanceUmPerpTrades, '2m', 'USDT'),
    (ms.DatasetType.BinanceUmPerpTrades, '10m', 'USDT'),
    (ms.DatasetType.BinanceUmPerpTrades, '4s', 'USDT'),
]

In [2]:
for dataset_type, grid_interval, peg_symbol in args:
    # Reads the underlying lossless dataset's `symbol_date_df` to determine symbol-date pairs
    dataset = ms.binance.BinanceLastTradesGrid(
        peg_symbol=peg_symbol, 
        grid_interval=grid_interval, 
        dataset_type=dataset_type, 
        parquet_names='*.parquet', # Write to a single parquet: polars defaults to "0.parquet, 1.parquet ..."
        num_workers=4, 
    )
    print(f'{dataset_type} {grid_interval} {peg_symbol}: {dataset.path}')
    dataset.compute(recompute=False, days_per_batch=30)

INFO:mnemosyne.dataset.interface:Computing 60 partitions in 2 batches (30 days/batch) with 4 workers


BinanceSpotTrades 20s USDC: /data/mnemosyne/binance/grids/spot/last_trade/20s/peg_symbol=USDC
BinanceSpotTrades 2m USDC: /data/mnemosyne/binance/grids/spot/last_trade/2m/peg_symbol=USDC
BinanceSpotTrades 10m USDC: /data/mnemosyne/binance/grids/spot/last_trade/10m/peg_symbol=USDC
BinanceUmPerpTrades 20s USDC: /data/mnemosyne/binance/grids/futures/um/last_trade/20s/peg_symbol=USDC
BinanceUmPerpTrades 2m USDC: /data/mnemosyne/binance/grids/futures/um/last_trade/2m/peg_symbol=USDC
BinanceUmPerpTrades 10m USDC: /data/mnemosyne/binance/grids/futures/um/last_trade/10m/peg_symbol=USDC
BinanceSpotTrades 20s USDT: /data/mnemosyne/binance/grids/spot/last_trade/20s/peg_symbol=USDT


  0%|          | 0/2 [00:00<?, ?it/s]

INFO:mnemosyne.dataset.interface:Computation complete: 60/60 successful


BinanceSpotTrades 2m USDT: /data/mnemosyne/binance/grids/spot/last_trade/2m/peg_symbol=USDT
BinanceSpotTrades 4s USDT: /data/mnemosyne/binance/grids/spot/last_trade/4s/peg_symbol=USDT
BinanceSpotTrades 10m USDT: /data/mnemosyne/binance/grids/spot/last_trade/10m/peg_symbol=USDT
BinanceUmPerpTrades 20s USDT: /data/mnemosyne/binance/grids/futures/um/last_trade/20s/peg_symbol=USDT
BinanceUmPerpTrades 2m USDT: /data/mnemosyne/binance/grids/futures/um/last_trade/2m/peg_symbol=USDT
BinanceUmPerpTrades 10m USDT: /data/mnemosyne/binance/grids/futures/um/last_trade/10m/peg_symbol=USDT
BinanceUmPerpTrades 4s USDT: /data/mnemosyne/binance/grids/futures/um/last_trade/4s/peg_symbol=USDT


# Usage example

In [6]:
from datetime import date as Date 

peg_symbol = 'USDC'
dstype = ms.DatasetType.BinanceSpotTrades
grid_interval = '10m'

dataset = ms.binance.BinanceLastTradesGrid(
        peg_symbol=peg_symbol, 
        grid_interval=grid_interval, 
        dataset_type=dataset_type, 
        parquet_names='*.parquet', 
        num_workers=4, 
    )

# Reading the full dataset
lf = pl.scan_parquet(dataset.path / f'**/{dataset.parquet_names}')
lf.head(1).collect()

symbol,date,time,peg_symbol,open,high,low,close,volume_base,volume_quote,trade_count,last_trade_time,taker_buy_volume_quote,taker_sell_volume_quote,vwap_taker_buy,vwap_taker_sell,vwap_total_by_base
enum,date,datetime[μs],str,f64,f64,f64,f64,f64,f64,u32,datetime[μs],f64,f64,f64,f64,f64
"""BTC""",2024-01-04,2024-01-04 12:30:00,"""USDC""",43272.9,43456.0,42980.1,43411.5,0.047,2036.8845,16,2024-01-04 12:39:28.393,1649.1237,387.7608,43398.04692,43084.698663,43337.968085


In [None]:
# Get universe: same as symbol-date-pair of the underlying universe
dataset.universe_df

symbol,date
str,date
"""BTC""",2025-10-05
"""ENA""",2024-08-05
"""LINK""",2024-12-08
"""BTC""",2025-09-14
"""SOL""",2025-04-25
…,…
"""CRV""",2025-10-17
"""TRUMP""",2025-10-17
"""BOME""",2025-10-17
"""UNI""",2025-10-17


In [None]:
# Getting specific dates
dataset[[Date(2024, 2, 2), Date(2025, 3, 4)]].collect()

symbol,date,time,peg_symbol,open,high,low,close,volume_base,volume_quote,trade_count,last_trade_time,taker_buy_volume_quote,taker_sell_volume_quote,vwap_taker_buy,vwap_taker_sell,vwap_total_by_base
str,date,datetime[μs],str,f64,f64,f64,f64,f64,f64,u32,datetime[μs],f64,f64,f64,f64,f64
"""BNB""",2024-02-02,2024-02-02 00:00:00,"""USDC""",300.06,300.06,299.71,299.98,23.39,7014.4566,177,2024-02-02 00:09:17.705,4036.1001,2978.3565,299.858869,299.935241,299.891261
"""BTC""",2024-02-02,2024-02-02 00:00:00,"""USDC""",43056.1,43082.3,43011.8,43068.7,23.108,994810.0591,434,2024-02-02 00:09:58.651,540385.8848,454424.1743,43051.784762,43048.904351,43050.461273
"""ETH""",2024-02-02,2024-02-02 00:00:00,"""USDC""",2302.9,2303.7,2299.39,2303.13,137.195,315792.4751,320,2024-02-02 00:09:50.536,109773.28767,206019.18743,2301.471908,2301.94249,2301.778309
"""XRP""",2024-02-02,2024-02-02 00:00:00,"""USDC""",0.5056,0.5057,0.5049,0.5053,60722.8,30678.17274,102,2024-02-02 00:09:50.628,12550.15994,18128.0128,0.50515,0.505263,0.505217
"""SOL""",2024-02-02,2024-02-02 00:00:00,"""USDC""",97.839,97.856,97.335,97.748,1734.47,169273.60727,732,2024-02-02 00:09:57.690,59633.10515,109640.50212,97.628022,97.575423,97.593851
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""BNB""",2025-03-04,1970-01-21 03:30:00,"""USDC""",575.4,588.4,545.82,583.69,61059.37,3.4661e7,113237,1970-01-21 03:38:52.794063,1.7052e7,1.7609e7,567.758032,567.851393,567.657145
"""CRV""",2025-03-04,1970-01-21 03:30:00,"""USDC""",0.413,0.4257,0.3674,0.4179,1.2394e7,4.9436e6,50638,1970-01-21 03:38:52.793583,2.3239e6,2.6197e6,0.39991,0.398696,0.398874
"""1000BONK""",2025-03-04,1970-01-21 03:30:00,"""USDC""",0.012438,0.012569,0.010901,0.011949,7.12926061e8,8.4018e6,107470,1970-01-21 03:38:52.799115,4.1345e6,4.2673e6,0.011795,0.011787,0.011785
"""NEO""",2025-03-04,1970-01-21 03:30:00,"""USDC""",8.711,8.844,8.114,8.736,320539.63,2.7209e6,39786,1970-01-21 03:38:52.794871,1.3551e6,1.3658e6,8.493885,8.487714,8.488408
