In [1]:
from pathlib import Path
import xarray as xr

root = Path("../../../work/processed/binance").expanduser()
cached_cube = xr.open_zarr(
    (root / "data" / "zarr_cube_intersection").as_posix(),
    consolidated=True,  # flip to False if you ever disable consolidation
)
print(cached_cube)

cube = cached_cube 


<xarray.Dataset> Size: 501MB
Dimensions:  (feature: 9, time: 2662909, symbol: 4)
Coordinates:
  * feature  (feature) <U13 468B 'Open' 'High' ... 'TakerBuyQuote'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    mask     (time, symbol, feature) bool 96MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
    values   (time, symbol, feature) float32 383MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
Attributes:
    calendar_source:       inferred:range_intersection
    feature_order_source:  inferred


In [2]:
print(cube)
print()
print(cube.coords.get("symbol").values)
print()
print(cube.coords.get("feature").values)
print()
print(cube.data_vars.get("values"))
print()

<xarray.Dataset> Size: 501MB
Dimensions:  (feature: 9, time: 2662909, symbol: 4)
Coordinates:
  * feature  (feature) <U13 468B 'Open' 'High' ... 'TakerBuyQuote'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    mask     (time, symbol, feature) bool 96MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
    values   (time, symbol, feature) float32 383MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
Attributes:
    calendar_source:       inferred:range_intersection
    feature_order_source:  inferred

['BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT']

['Open' 'High' 'Low' 'Close' 'Volume' 'QuoteVolume' 'Trades'
 'TakerBuyBase' 'TakerBuyQuote']

<xarray.DataArray 'values' (time: 2662909, symbol: 4, feature: 9)> Size: 383MB
dask.array<open_dataset-values, shape=(2662909, 4, 9), dtype=float32, chunksize=(16384, 4, 9), chunktype=numpy.ndarray>
Coordinates:
  * feature

In [3]:
from ace_rl.core.candle_feat_ops import (
    make_atr, make_rsi,
    make_ema_close, make_ema_ratio, 
    make_return,
    make_taker_buy_ratio,
    make_volatility,
    make_volume_ratio,
    make_zscore
)

In [4]:
from pathlib import Path
import xarray as xr
from ace_rl.core.cube_feature import load_cube

root = Path("/home/kylh/phd/tw_fin_rl/work/processed/binance")
feature_names = [
    "liq_volratio_w60_0895",
    "mom_rsi_14_a289",
    "of_taker_buy_ratio_967f",
    "ret_lag5_bb88",
    "risk_atr_14_1981",
    "trend_emar_20_264a",
    "vol_win60_4b89",
    "z_Close_w240_1f15",
]

feature_cubes = {name: load_cube(root, "features", name) for name in feature_names}

## print debug 
for name, ds in feature_cubes.items():
    print(f"Feature: {name}, shape: {ds['values'].shape}, coords: {ds.coords}")
    print(ds)
    print()

# optional: stitch them into one dataset sharing the same time/symbol index
combined = xr.concat([ds["values"] for ds in feature_cubes.values()], dim="feature").to_dataset(name="values")


Feature: liq_volratio_w60_0895, shape: (2662909, 4, 1), coords: Coordinates:
  * feature  (feature) <U12 48B 'vol_ratio_60'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
<xarray.Dataset> Size: 64MB
Dimensions:  (feature: 1, symbol: 4, time: 2662909)
Coordinates:
  * feature  (feature) <U12 48B 'vol_ratio_60'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    values   (time, symbol, feature) float32 43MB dask.array<chunksize=(16384, 4, 1), meta=np.ndarray>
Attributes:
    params:   {'method': 'vol/ma', 'window': 60}

Feature: mom_rsi_14_a289, shape: (2662909, 4, 1), coords: Coordinates:
  * feature  (feature) <U6 24B 'rsi_14'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-

In [5]:
print(combined)

print(combined.coords.get("symbol").values)
print(combined.coords.get("feature").values)

<xarray.Dataset> Size: 362MB
Dimensions:  (feature: 8, symbol: 4, time: 2662909)
Coordinates:
  * feature  (feature) <U15 480B 'vol_ratio_60' 'rsi_14' ... 'z_Close_240'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    values   (time, symbol, feature) float32 341MB dask.array<chunksize=(16384, 4, 1), meta=np.ndarray>
['BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT']
['vol_ratio_60' 'rsi_14' 'taker_buy_ratio' 'return_lag5' 'atr_14'
 'ema_ratio_20' 'volatility_60' 'z_Close_240']
