In [1]:
from pathlib import Path
import xarray as xr

root = Path("../../../work/processed/binance").expanduser()
cached_cube = xr.open_zarr(
    (root / "data" / "zarr_cube_intersection").as_posix(),
    consolidated=True,  # flip to False if you ever disable consolidation
)
print(cached_cube)

cube = cached_cube 

print('\n==========\n', cube)


<xarray.Dataset> Size: 501MB
Dimensions:  (feature: 9, time: 2662909, symbol: 4)
Coordinates:
  * feature  (feature) <U13 468B 'Open' 'High' ... 'TakerBuyQuote'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    mask     (time, symbol, feature) bool 96MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
    values   (time, symbol, feature) float32 383MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
Attributes:
    calendar_source:       inferred:range_intersection
    feature_order_source:  inferred

 <xarray.Dataset> Size: 501MB
Dimensions:  (feature: 9, time: 2662909, symbol: 4)
Coordinates:
  * feature  (feature) <U13 468B 'Open' 'High' ... 'TakerBuyQuote'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    mask     (time, symbol, f

In [2]:
import xarray as xr
import numpy as np
import pandas as pd

# === CONFIG ===
LABEL_ZARR = "../../../work/processed/binance/data/zarr_cube_label"
GROUP      = "labels"
RET_EPS    = 5e-5  # epsilon để phân loại returns: >eps pos, |r|<=eps zero, <-eps neg

# === LOAD ===
ds_lbl = xr.open_zarr(LABEL_ZARR, group=GROUP)
values = ds_lbl["values"]            # (time, symbol, label)
labels = list(map(str, ds_lbl["label"].values))
symbols = list(map(str, ds_lbl["symbol"].values))
time = ds_lbl["time"].values
print(f"Loaded labels cube: values{tuple(values.shape)} | symbols={symbols[:8]} ... | n_labels={len(labels)}")

print('\n==========\n', ds_lbl)


Loaded labels cube: values(2662909, 4, 8) | symbols=['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT'] ... | n_labels=8

 <xarray.Dataset> Size: 447MB
Dimensions:  (label: 8, time: 2662909, symbol: 4)
Coordinates:
  * label    (label) object 64B 'ret_log__H1' 'ret_log__H15' ... 'tb_t_hit__H60'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    mask     (time, symbol, label) bool 85MB dask.array<chunksize=(16384, 4, 8), meta=np.ndarray>
    values   (time, symbol, label) float32 341MB dask.array<chunksize=(16384, 4, 8), meta=np.ndarray>
Attributes:
    horizons:           [1, 5, 15, 60]
    label_builder:      returns + triple_barrier(strict_nan)
    strict_nan_window:  True
    tb_cfg_pct:         {"up_pct": 0.01, "dn_pct": 0.005, "H": 60, "neutral":...
    tb_cfg_vol:         {"enabled": true, "up": 2.0, "dn": 2.0, "H": 60, "win...


In [3]:
from pathlib import Path
import xarray as xr
from ace_rl.core.cube_feature import load_cube

root = Path("../../../work/processed/binance")
feature_names = [
    "liq_volratio_w60_0895",
    "mom_rsi_14_a289",
    "of_taker_buy_ratio_967f",
    "ret_lag5_bb88",
    "risk_atr_14_1981",
    "trend_emar_20_264a",
    "vol_win60_4b89",
    "z_Close_w240_1f15",
]

feature_cubes = {name: load_cube(root, "features", name) for name in feature_names}

## print debug 
for name, ds in feature_cubes.items():
    print(f"Feature: {name}, shape: {ds['values'].shape}, coords: {ds.coords}")
    print(ds)
    print()

# optional: stitch them into one dataset sharing the same time/symbol index
combined = xr.concat([ds["values"] for ds in feature_cubes.values()], dim="feature").to_dataset(name="values")

print("Combined dataset:")
print('\n==========\n', combined)

Feature: liq_volratio_w60_0895, shape: (2662909, 4, 1), coords: Coordinates:
  * feature  (feature) <U12 48B 'vol_ratio_60'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
<xarray.Dataset> Size: 64MB
Dimensions:  (feature: 1, symbol: 4, time: 2662909)
Coordinates:
  * feature  (feature) <U12 48B 'vol_ratio_60'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    values   (time, symbol, feature) float32 43MB dask.array<chunksize=(16384, 4, 1), meta=np.ndarray>
Attributes:
    params:   {'method': 'vol/ma', 'window': 60}

Feature: mom_rsi_14_a289, shape: (2662909, 4, 1), coords: Coordinates:
  * feature  (feature) <U6 24B 'rsi_14'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-