In [1]:
import polars as pl
import mnemosyne as ms
import atlas 
import metis

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ms.DatasetType.BinanceSpotTrades.hive_path('USDLN')

'/data/mnemosyne/binance/lossless/spot/last_trade/peg_symbol=USDLN'

In [3]:
root = '/data/mnemosyne/binance/grids/spot/last_trade/1m/peg_symbol=USDT/**/*.parquet'
pl.scan_parquet(root).collect_schema().names()

['symbol',
 'date',
 'peg_symbol',
 'open',
 'high',
 'low',
 'close',
 'volume_base',
 'volume_quote',
 'trade_count',
 'taker_buy_volume_quote',
 'taker_sell_volume_quote',
 'vwap_taker_buy',
 'vwap_taker_sell',
 'vwap_total_by_base',
 'actual_time_offset',
 'time']

In [7]:

from datetime import datetime, timedelta
from datetime import date as Date 
from pathlib import Path

# Create sample dates
start_date = Date(2024, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(1000)]

# Initialize dataset view
dataset = ms.dataset.ParquetDatasetView(
    partitions=dates, 
    parquet_names='*.parquet', 
    path=Path("/data/mnemosyne/binance/grids/spot/last_trade/1m/peg_symbol=USDT"), 
    num_workers=8,
    expected_columns = None
)

INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache


In [None]:
print(f"Dataset has {dataset.num_partitions()} partitions")
print(f"Cache has {dataset.num_validated()} validated partitions")

# Example 1: Validate all partitions (batch update)
try:
    dataset.validate()
    print("✓ All partitions are valid")
except RuntimeError as e:
    print(f"✗ Validation failed: {e}")

INFO:mnemosyne.dataset.interface:Validating 970 partitions with 8 workers


Dataset has 1000 partitions
Cache has 30 validated partitions


  0%|          | 0/970 [00:00<?, ?it/s]INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
  0%|          | 1/970 [00:00<02:35,  6.23it/s]INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 30 validated partitions from cache
INFO:mnemosyne

Error reading 2025-10-12 partition: expected at least 1 source
Error reading 2025-10-13 partition: expected at least 1 source
Error reading 2025-10-14 partition: expected at least 1 source
Error reading 2025-10-15 partition: expected at least 1 source
Error reading 2025-10-16 partition: expected at least 1 source
Error reading 2025-10-17 partition: expected at least 1 source
Error reading 2025-10-18 partition: expected at least 1 source
Error reading 2025-10-19 partition: expected at least 1 source
Error reading 2025-10-20 partition: expected at least 1 source
Error reading 2025-10-21 partition: expected at least 1 source
Error reading 2025-10-22 partition: expected at least 1 source
Error reading 2025-10-23 partition: expected at least 1 source
Error reading 2025-10-24 partition: expected at least 1 source
Error reading 2025-10-25 partition: expected at least 1 source
Error reading 2025-10-26 partition: expected at least 1 source
Error reading 2025-10-27 partition: expected at least 1

In [None]:
# Example 2: Check specific partition
if dataset.valid_partition(dates[0]):
    print(f"✓ {dates[0]} is valid")

# Example 3: Get data for specific dates
lf = dataset.get_partitions(dates=dates[:7])
print("Loaded LazyFrame for first 7 days")

# Example 4: Clear cache and revalidate
dataset.clear_cache(memory=True, file=False)
invalid = dataset.invalid_partitions(recompute=True)
print(f"Found {len(invalid)} invalid partitions after fresh validation")

INFO:mnemosyne.dataset.interface:Validating 1000 partitions with 8 workers


✓ 2024-01-01 is valid
Loaded LazyFrame for first 7 days


  0%|          | 0/1000 [00:00<?, ?it/s]INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
  0%|          | 1/1000 [00:00<02:11,  7.61it/s]INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache
INFO:mnemosyne.dataset.interface:Loaded 650 validated partitions from cache


Error reading 2025-10-12 partition: expected at least 1 source
Error reading 2025-10-13 partition: expected at least 1 source
Error reading 2025-10-14 partition: expected at least 1 source
Error reading 2025-10-15 partition: expected at least 1 source
Error reading 2025-10-16 partition: expected at least 1 source
Error reading 2025-10-17 partition: expected at least 1 source
Error reading 2025-10-18 partition: expected at least 1 source
Error reading 2025-10-19 partition: expected at least 1 source
Error reading 2025-10-21 partition: expected at least 1 source
Error reading 2025-10-20 partition: expected at least 1 source
Error reading 2025-10-22 partition: expected at least 1 source
Error reading 2025-10-23 partition: expected at least 1 source
Error reading 2025-10-24 partition: expected at least 1 source
Error reading 2025-10-26 partition: expected at least 1 source
Error reading 2025-10-25 partition: expected at least 1 source
Error reading 2025-10-27 partition: expected at least 1