In [1]:
# ! pip install dask

In [2]:
from pathlib import Path
from ace_rl.core.cube_builder import CubeBuilderConfig, build_raw_cube

# root = Path("../../../work/processed/binance").expanduser()
# cfg = CubeBuilderConfig(
#     per_symbol_root=root / "data" / "zarr_per_symbol",
#     # chưa có meta -> None; builder sẽ suy luận
#     calendar_path=None,
#     feature_order_path=None,
#     symbols=["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT"],
#     use_dask=False,
#     # Quan trọng: cắt block theo giao thời gian
#     calendar_mode="range_intersection",  # mặc định đã là mode này
#     output_path=root / "data" / "zarr_cube_intersection",  # tuỳ chọn
#     debug_timing=True
# )

# cube = build_raw_cube(cfg)
# print(cube)


In [3]:
from pathlib import Path
import xarray as xr

root = Path("../../../work/processed/binance").expanduser()
cached_cube = xr.open_zarr(
    (root / "data" / "zarr_cube_intersection").as_posix(),
    consolidated=True,  # flip to False if you ever disable consolidation
)
print(cached_cube)

cube = cached_cube 


<xarray.Dataset> Size: 501MB
Dimensions:  (feature: 9, time: 2662909, symbol: 4)
Coordinates:
  * feature  (feature) <U13 468B 'Open' 'High' ... 'TakerBuyQuote'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    mask     (time, symbol, feature) bool 96MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
    values   (time, symbol, feature) float32 383MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
Attributes:
    calendar_source:       inferred:range_intersection
    feature_order_source:  inferred


In [4]:
print(cube)
print()
print(cube.coords.get("symbol").values)
print()
print(cube.coords.get("feature").values)
print()
print(cube.data_vars.get("values"))
print()

<xarray.Dataset> Size: 501MB
Dimensions:  (feature: 9, time: 2662909, symbol: 4)
Coordinates:
  * feature  (feature) <U13 468B 'Open' 'High' ... 'TakerBuyQuote'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    mask     (time, symbol, feature) bool 96MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
    values   (time, symbol, feature) float32 383MB dask.array<chunksize=(16384, 4, 9), meta=np.ndarray>
Attributes:
    calendar_source:       inferred:range_intersection
    feature_order_source:  inferred

['BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT']

['Open' 'High' 'Low' 'Close' 'Volume' 'QuoteVolume' 'Trades'
 'TakerBuyBase' 'TakerBuyQuote']

<xarray.DataArray 'values' (time: 2662909, symbol: 4, feature: 9)> Size: 383MB
dask.array<open_dataset-values, shape=(2662909, 4, 9), dtype=float32, chunksize=(16384, 4, 9), chunktype=numpy.ndarray>
Coordinates:
  * feature

In [17]:
# features_ops.py
from __future__ import annotations
from typing import Tuple, Dict, Any
import numpy as np
import xarray as xr

EPS = 1e-12

# -------------------------
# Core helpers (không đụng tới chunking logic của bạn)
# -------------------------
def _inherit_chunks_like(base_cube: xr.Dataset, da: xr.DataArray) -> xr.DataArray:
    """Set chunk spec của feature mới giống cube gốc (time,symbol) + feature:1."""
    base_values = base_cube["values"]
    base_chunks = getattr(base_values.data, "chunks", None)
    print("[inherit] incoming array dims:", da.dims, "shape:", da.shape, "dtype:", da.dtype)
    if base_chunks:
        chunk_map = dict(zip(base_values.dims, base_chunks))
        spec: Dict[str, int] = {}
        if "time" in chunk_map:
            spec["time"] = int(chunk_map["time"][0])
        if "symbol" in chunk_map:
            spec["symbol"] = int(chunk_map["symbol"][0])
        spec["feature"] = 1
        printable_chunk_map = {
            dim: tuple(int(c) for c in chunk_map[dim]) for dim in chunk_map
        }
        print("[inherit] base dims:", base_values.dims)
        print("[inherit] base chunk map:", printable_chunk_map)
        print("[inherit] requested chunk spec:", spec)
        da = da.chunk(spec)
        print("[inherit] resulting chunks:", da.chunks)
    else:
        print("[inherit] base cube not chunked; skip chunk inheritance")
    return da

def _wrap_single_feature(
    base_cube: xr.Dataset, array: xr.DataArray, feature_name: str, params: Dict[str, Any]
) -> xr.Dataset:
    print("[wrap] feature:", feature_name)
    print("[wrap] source dims:", array.dims, "shape:", array.shape, "dtype:", array.dtype)
    coord_summary = {
        dim: {
            "dtype": str(array.coords[dim].dtype),
            "type": type(array.coords[dim].data).__name__
        }
        for dim in array.dims if dim in array.coords
    }
    print("[wrap] source coord summary:", coord_summary)

    da = array.transpose("time", "symbol")
    print("[wrap] after transpose dims:", da.dims, "shape:", da.shape)
    da = da.astype("float32")
    print("[wrap] after astype dtype:", da.dtype)
    da = da.expand_dims(feature=[feature_name], axis=-1)
    print("[wrap] after expand dims:", da.dims, "shape:", da.shape)

    da = _inherit_chunks_like(base_cube, da)

    # Ép coord về chuỗi thuần, bỏ encoding object để tránh lỗi VLenUTF8
    if "symbol" in da.coords:
        da = da.assign_coords(symbol=da.coords["symbol"].astype(str))
        da.coords["symbol"].encoding = {}
    if "feature" in da.coords:
        da = da.assign_coords(feature=da.coords["feature"].astype(str))
        da.coords["feature"].encoding = {}

    ds = da.to_dataset(name="values")
    ds.attrs["params"] = params

    if "symbol" in ds.coords:
        symbol_coord = ds.coords["symbol"]
        symbol_preview = symbol_coord.values.tolist()[:5]
        print("[wrap] symbol coord dtype:", symbol_coord.dtype)
        print("[wrap] symbol coord python type:", type(symbol_coord.values).__name__)
        print("[wrap] symbol coord preview:", symbol_preview)
    if "feature" in ds.coords:
        feature_data = ds.coords["feature"].values.tolist()
        print("[wrap] feature coord:", feature_data)
    if "time" in ds.coords:
        time_coord = ds.coords["time"]
        print("[wrap] time coord dtype:", time_coord.dtype, "len:", time_coord.size)

    return ds

def _make_name(prefix: str, short: str, params: Dict[str, Any]) -> str:
    import json as _json
    import hashlib as _hashlib
    dumped = _json.dumps(params, sort_keys=True)
    suffix = _hashlib.sha1(dumped.encode()).hexdigest()[:4]
    name = f"{prefix}_{short}_{suffix}"
    print("[_make_name] params:", dumped)
    print("[_make_name] generated:", name)
    return name

# -------------------------
# Elementwise / simple rolling
# -------------------------
def make_return(cube: xr.Dataset, lookback: int = 1) -> Tuple[str, xr.Dataset]:
    """Log-return k-bar: log(C_t) - log(C_{t-k})."""
    close = cube["values"].sel(feature="Close")
    print(close)
    ret = np.log(close / close.shift(time=lookback))
    print(ret)
    params = {"lookback": lookback, "method": "log_return"}
    name = _make_name("ret", f"lag{lookback}", params)
    print(name)
    return name, _wrap_single_feature(cube, ret, f"return_lag{lookback}", params)



In [9]:
# ví dụ build và save một vài feature
root = Path("../../../work/processed/binance")
base = cube


In [None]:
from ace_rl.core.cube_feature import (
    save_cube, load_cube
)

In [16]:
print("base dims:", base.dims)
print("base data variables:", list(base.data_vars))

# xem toạ độ symbol
sym = base.coords["symbol"]
print("symbol dtype:", sym.dtype, "encoding:", sym.encoding)
print("symbol type:", type(sym.values).__name__)
print("symbol values (full):", sym.values.tolist())

# nếu muốn, soi cả feature/time
feat = base.coords["feature"]
print("feature dtype:", feat.dtype, "encoding:", feat.encoding)
print("feature values:", feat.values.tolist())
time = base.coords["time"]
print("time dtype:", time.dtype, "len:", time.size)


base data variables: ['mask', 'values']
symbol dtype: object encoding: {'chunks': (4,), 'preferred_chunks': {'symbol': 4}, 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0), 'filters': [VLenUTF8()], 'dtype': dtype('O')}
symbol type: ndarray
symbol values (full): ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT']
feature dtype: <U13 encoding: {'chunks': (9,), 'preferred_chunks': {'feature': 9}, 'compressor': Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0), 'filters': None, 'dtype': dtype('<U13')}
feature values: ['Open', 'High', 'Low', 'Close', 'Volume', 'QuoteVolume', 'Trades', 'TakerBuyBase', 'TakerBuyQuote']
time dtype: datetime64[ns] len: 2662909


In [None]:

name, ds_ret5   = make_return(base, lookback=5)
save_cube(ds_ret5, root, subdir="features", name=name)


<xarray.DataArray 'values' (time: 2662909, symbol: 4)> Size: 43MB
dask.array<getitem, shape=(2662909, 4), dtype=float32, chunksize=(16384, 4), chunktype=numpy.ndarray>
Coordinates:
    feature  <U13 52B 'Close'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
<xarray.DataArray 'values' (time: 2662909, symbol: 4)> Size: 43MB
dask.array<log, shape=(2662909, 4), dtype=float32, chunksize=(16384, 4), chunktype=numpy.ndarray>
Coordinates:
    feature  <U13 52B 'Close'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
[_make_name] params: {"lookback": 5, "method": "log_return"}
[_make_name] generated: ret_lag5_bb88
ret_lag5_bb88
[wrap] feature: return_lag5
[wrap] source dims: ('time', 'symbol') shape: (2662909, 4) dtype: float32
[wrap] source coord summary: {'time': {'dtype': 'datetime64

  x = np.divide(x1, x2, out)
  x = np.divide(x1, x2, out)


[0]: x=394.899994, ema=394.899994
[1]: x=394.670013, ema=394.878091
[2]: x=394.950012, ema=394.884941
[3]: x=394.799988, ema=394.876850
[4]: x=394.519989, ema=394.842863
[0]: x=11850.099609, ema=11850.099609
[1]: x=11843.830078, ema=11849.502511
[2]: x=11850.139648, ema=11849.563191
[3]: x=11849.030273, ema=11849.512437
[4]: x=11841.759766, ema=11848.774087
[0]: x=22.379900, ema=22.379900
[1]: x=22.363501, ema=22.378338
[2]: x=22.395800, ema=22.380001
[3]: x=22.387100, ema=22.380677
[4]: x=22.376400, ema=22.380270
[0]: x=2.850000, ema=2.850000
[1]: x=2.850000, ema=2.850000
[2]: x=2.850000, ema=2.850000
[3]: x=3.000300, ema=2.864314
[4]: x=3.185200, ema=2.894875
[2662908]: x=207.500000, ema=207.805426
[2662908]: x=110928.281250, ema=110947.693837
[2662908]: x=848.109985, ema=848.670701
[2662908]: x=4409.959961, ema=4416.794949
[1]: x=0.000000, ema=0.000000
[2]: x=0.000000, ema=0.000000
[3]: x=0.620000, ema=0.082667
[4]: x=0.184900, ema=0.096298
[1]: x=0.369995, ema=0.369995
[2]: x=0.389

  x = np.divide(x1, x2, out)


In [19]:
loaded_ds_ret5 = load_cube(root, subdir="features", name=name)
print(loaded_ds_ret5)

<xarray.Dataset> Size: 64MB
Dimensions:  (feature: 1, symbol: 4, time: 2662909)
Coordinates:
  * feature  (feature) <U11 44B 'return_lag5'
  * symbol   (symbol) <U7 112B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
Data variables:
    values   (time, symbol, feature) float32 43MB dask.array<chunksize=(16384, 4, 1), meta=np.ndarray>
Attributes:
    params:   {'lookback': 5, 'method': 'log_return'}


In [23]:
import numpy as np

# >>> chọn symbol và chỉ số thời gian bạn muốn đối chiếu
symbols = ["BTCUSDT", "ETHUSDT"]
time_idx = [1000, 1001, 1002]  # đổi theo vị trí bạn quan tâm (nhớ >= lookback)

close_full = cube["values"].sel(feature="Close", symbol=symbols)

close_t = close_full.isel(time=time_idx)
close_t_minus5 = close_full.shift(time=5).isel(time=time_idx)

ret_manual = np.log(close_t / close_t_minus5)

ret_saved = loaded_ds_ret5["values"].sel(feature="return_lag5", symbol=symbols).isel(time=time_idx)

print("Close(t):")
print(close_t.compute())

print("\nClose(t-5):")
print(close_t_minus5.compute())

print("\nManual returns:")
print(ret_manual.compute())

print("\nSaved return_lag5:")
print(ret_saved.compute())

print("\nDifference (saved - manual):")
print((ret_saved - ret_manual).compute())


Close(t):
<xarray.DataArray 'values' (time: 3, symbol: 2)> Size: 24B
array([[11310.08,   374.7 ],
       [11309.79,   374.88],
       [11335.  ,   375.19]], dtype=float32)
Coordinates:
    feature  <U13 52B 'Close'
  * symbol   (symbol) object 16B 'BTCUSDT' 'ETHUSDT'
  * time     (time) datetime64[ns] 24B 2020-08-11T22:40:00 ... 2020-08-11T22:...

Close(t-5):
<xarray.DataArray 'values' (time: 3, symbol: 2)> Size: 24B
array([[11307.39,   372.84],
       [11300.13,   372.91],
       [11303.65,   373.24]], dtype=float32)
Coordinates:
    feature  <U13 52B 'Close'
  * symbol   (symbol) object 16B 'BTCUSDT' 'ETHUSDT'
  * time     (time) datetime64[ns] 24B 2020-08-11T22:40:00 ... 2020-08-11T22:...

Manual returns:
<xarray.DataArray 'values' (time: 3, symbol: 2)> Size: 24B
array([[0.00023791, 0.00497639],
       [0.00085448, 0.00526886],
       [0.00276957, 0.00521098]], dtype=float32)
Coordinates:
    feature  <U13 52B 'Close'
  * symbol   (symbol) object 16B 'BTCUSDT' 'ETHUSDT'
  * time    

In [22]:
symbols = ["BTCUSDT", "ETHUSDT"]
indices = np.arange(5, 10)  # thay cho slice

times = cube.coords["time"].isel(time=indices)
close_t = cube["values"].sel(feature="Close", symbol=symbols, time=times)

close_t_lag = cube["values"].sel(
    feature="Close", symbol=symbols,
    time=cube.coords["time"].isel(time=indices - 5)
)

ret_manual = np.log(close_t / close_t_lag)
ret_saved = loaded_ds_ret5["values"].sel(
    symbol=symbols, time=times, feature="return_lag5"
)

print(ret_manual.compute())
print(ret_saved.compute())


<xarray.DataArray 'values' (time: 0, symbol: 2)> Size: 0B
array([], shape=(0, 2), dtype=float32)
Coordinates:
  * symbol   (symbol) object 16B 'BTCUSDT' 'ETHUSDT'
  * time     (time) datetime64[ns] 0B 
    feature  <U13 52B 'Close'
<xarray.DataArray 'values' (time: 5, symbol: 2)> Size: 40B
array([[-1.8896458e-04, -3.5453163e-04],
       [ 4.2119689e-04,  4.3061044e-04],
       [-1.2064707e-04, -2.2795414e-04],
       [-5.9904462e-05, -1.5200338e-04],
       [ 6.2898878e-04,  5.3224707e-04]], dtype=float32)
Coordinates:
    feature  <U11 44B 'return_lag5'
  * symbol   (symbol) <U7 56B 'BTCUSDT' 'ETHUSDT'
  * time     (time) datetime64[ns] 40B 2020-08-11T06:05:00 ... 2020-08-11T06:...


In [33]:

def make_volatility(cube: xr.Dataset, window: int = 30, strict_tminus1: bool = True) -> Tuple[str, xr.Dataset]:
    """Rolling std of log-return; optional strict t-1 stats to tránh leak."""
    close = cube["values"].sel(feature="Close")
    r1 = np.log(close / close.shift(time=1))
    x = r1.shift(time=1) if strict_tminus1 else r1
    vol = x.rolling(time=window, min_periods=window).std()
    params = {"window": window, "method": "rolling_std", "strict_tminus1": strict_tminus1}
    name = _make_name("vol", f"win{window}", params)
    return name, _wrap_single_feature(cube, vol, f"volatility_{window}", params)

def make_zscore(cube: xr.Dataset, source_feature: str, window: int, strict_tminus1: bool = True):
    """Z-score rolling: (x - mean)/(std+eps), stats theo t-1 nếu strict."""
    x = cube["values"].sel(feature=source_feature)
    x_stat = x.shift(time=1) if strict_tminus1 else x
    mu = x_stat.rolling(time=window, min_periods=window).mean()
    sd = x_stat.rolling(time=window, min_periods=window).std()
    z = (x - mu) / (sd + EPS)
    params = {"src": source_feature, "window": window, "strict_tminus1": strict_tminus1}
    name = _make_name("z", f"{source_feature}_w{window}", params)
    return name, _wrap_single_feature(cube, z, f"z_{source_feature}_{window}", params)

def make_volume_ratio(cube: xr.Dataset, window: int = 60):
    """Volume / EMA/MA(window) — dùng MA mặc định cho đơn giản & ổn định."""
    v = cube["values"].sel(feature="Volume")
    ma = v.rolling(time=window, min_periods=window).mean()
    ratio = v / (ma + EPS)
    params = {"window": window, "method": "vol/ma"}
    name = _make_name("liq", f"volratio_w{window}", params)
    return name, _wrap_single_feature(cube, ratio, f"vol_ratio_{window}", params)

def make_taker_buy_ratio(cube: xr.Dataset):
    """TB_base / Volume, bound [0,1] (khi dữ liệu hợp lệ)."""
    tb = cube["values"].sel(feature="TakerBuyBase")
    v  = cube["values"].sel(feature="Volume")
    r = tb / (v + EPS)
    params = {"method": "taker_buy_ratio"}
    name = _make_name("of", "taker_buy_ratio", params)
    return name, _wrap_single_feature(cube, r, "taker_buy_ratio", params)

# -------------------------
# Stateful ops via apply_ufunc (EMA, ATR, RSI)
# -------------------------
def _ema_1d(x: np.ndarray, alpha: float, debug=True, label="") -> np.ndarray:
    y = np.empty_like(x, dtype=np.float64)
    s = np.nan
    for i, v in enumerate(x):
        if np.isnan(v):
            y[i] = s
            continue
        s = v if np.isnan(s) else alpha * v + (1.0 - alpha) * s
        y[i] = s
        if debug and (i < 5 or i == len(x) - 1):
            print(f"{label}[{i}]: x={v:.6f}, ema={s:.6f}")
   
    return y

def _ema_da(x: xr.DataArray, span: int) -> xr.DataArray:
    alpha = 2.0 / (span + 1.0)
    # return xr.apply_ufunc(
    #     _ema_1d, x, alpha,
    #     input_core_dims=[["time"], []],
    #     output_core_dims=[["time"]],
    #     vectorize=True,
    #     dask="parallelized",
    #     output_dtypes=[np.float64],
    # )
    return xr.apply_ufunc(
        _ema_1d, x, alpha,
        input_core_dims=[["time"], []],
        output_core_dims=[["time"]],
        vectorize=True,
        dask="parallelized",
        dask_gufunc_kwargs={"allow_rechunk": True},
        output_dtypes=[np.float64],
    )


def make_ema_close(cube: xr.Dataset, span: int = 20):
    """EMA(Close, span) với apply_ufunc (chạy được cả dask)."""
    c = cube["values"].sel(feature="Close")
    ema = _ema_da(c, span)
    params = {"span": span, "method": "ema"}
    name = _make_name("ema", f"C_{span}", params)
    return name, _wrap_single_feature(cube, ema, f"ema_close_{span}", params)

def make_ema_ratio(cube: xr.Dataset, span: int = 20):
    """C/EMA(C)-1."""
    c = cube["values"].sel(feature="Close")
    ema = _ema_da(c, span)
    ratio = c / (ema + EPS) - 1.0
    params = {"span": span, "method": "ema_ratio"}
    name = _make_name("trend", f"emar_{span}", params)
    return name, _wrap_single_feature(cube, ratio, f"ema_ratio_{span}", params)

def make_atr(cube: xr.Dataset, span: int = 14):
    """ATR = EMA(TR, span)."""
    H = cube["values"].sel(feature="High")
    L = cube["values"].sel(feature="Low")
    C = cube["values"].sel(feature="Close")
    Cprev = C.shift(time=1)
    tr = xr.ufuncs.maximum(H - L, xr.ufuncs.maximum(abs(H - Cprev), abs(L - Cprev)))
    atr = _ema_da(tr, span)
    params = {"span": span, "method": "atr=ema(TR)"}
    name = _make_name("risk", f"atr_{span}", params)
    return name, _wrap_single_feature(cube, atr, f"atr_{span}", params)

def make_rsi(cube: xr.Dataset, span: int = 14):
    """RSI = 100 * EMA(gain) / EMA(|delta|)."""
    C = cube["values"].sel(feature="Close")
    d = C - C.shift(time=1)
    gain = xr.where(d > 0, d, 0.0)
    mag  = abs(d)
    up = _ema_da(gain, span)
    dm = _ema_da(mag,  span)
    rsi = 100.0 * (up / (dm + EPS))
    params = {"span": span, "method": "rsi=ema(gain)/ema(|delta|)"}
    name = _make_name("mom", f"rsi_{span}", params)
    return name, _wrap_single_feature(cube, rsi, f"rsi_{span}", params)


In [27]:
name, ds_ret5   = make_return(base, lookback=5)
save_cube(ds_ret5, root, subdir="features", name=name)

<xarray.DataArray 'values' (time: 2662909, symbol: 4)> Size: 43MB
dask.array<getitem, shape=(2662909, 4), dtype=float32, chunksize=(16384, 4), chunktype=numpy.ndarray>
Coordinates:
    feature  <U13 52B 'Close'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
<xarray.DataArray 'values' (time: 2662909, symbol: 4)> Size: 43MB
dask.array<log, shape=(2662909, 4), dtype=float32, chunksize=(16384, 4), chunktype=numpy.ndarray>
Coordinates:
    feature  <U13 52B 'Close'
  * symbol   (symbol) object 32B 'BTCUSDT' 'ETHUSDT' 'BNBUSDT' 'SOLUSDT'
  * time     (time) datetime64[ns] 21MB 2020-08-11T06:00:00 ... 2025-09-04T11...
[_make_name] params: {"lookback": 5, "method": "log_return"}
[_make_name] generated: ret_lag5_bb88
ret_lag5_bb88
[wrap] feature: return_lag5
[wrap] source dims: ('time', 'symbol') shape: (2662909, 4) dtype: float32
[wrap] source coord summary: {'time': {'dtype': 'datetime64

In [28]:
name, ds_vol60  = make_volatility(base, window=60)
save_cube(ds_vol60, root, subdir="features", name=name)

[_make_name] params: {"method": "rolling_std", "strict_tminus1": true, "window": 60}
[_make_name] generated: vol_win60_4b89
[wrap] feature: volatility_60
[wrap] source dims: ('time', 'symbol') shape: (2662909, 4) dtype: float32
[wrap] source coord summary: {'time': {'dtype': 'datetime64[ns]', 'type': 'ndarray'}, 'symbol': {'dtype': 'object', 'type': 'ndarray'}}
[wrap] after transpose dims: ('time', 'symbol') shape: (2662909, 4)
[wrap] after astype dtype: float32
[wrap] after expand dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1)
[inherit] incoming array dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1) dtype: float32
[inherit] base dims: ('time', 'symbol', 'feature')
[inherit] base chunk map: {'time': (16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16

In [34]:
name, ds_emar20 = make_ema_ratio(base, span=20)
save_cube(ds_emar20, root, subdir="features", name=name)

[_make_name] params: {"method": "ema_ratio", "span": 20}
[_make_name] generated: trend_emar_20_264a
[wrap] feature: ema_ratio_20
[wrap] source dims: ('time', 'symbol') shape: (2662909, 4) dtype: float64
[wrap] source coord summary: {'time': {'dtype': 'datetime64[ns]', 'type': 'ndarray'}, 'symbol': {'dtype': 'object', 'type': 'ndarray'}}
[wrap] after transpose dims: ('time', 'symbol') shape: (2662909, 4)
[wrap] after astype dtype: float32
[wrap] after expand dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1)
[inherit] incoming array dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1) dtype: float32
[inherit] base dims: ('time', 'symbol', 'feature')
[inherit] base chunk map: {'time': (16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384,

In [35]:
name, ds_atr14  = make_atr(base, span=14)
save_cube(ds_atr14, root, subdir="features", name=name)

[_make_name] params: {"method": "atr=ema(TR)", "span": 14}
[_make_name] generated: risk_atr_14_1981
[wrap] feature: atr_14
[wrap] source dims: ('symbol', 'time') shape: (4, 2662909) dtype: float64
[wrap] source coord summary: {'symbol': {'dtype': 'object', 'type': 'ndarray'}, 'time': {'dtype': 'datetime64[ns]', 'type': 'ndarray'}}
[wrap] after transpose dims: ('time', 'symbol') shape: (2662909, 4)
[wrap] after astype dtype: float32
[wrap] after expand dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1)
[inherit] incoming array dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1) dtype: float32
[inherit] base dims: ('time', 'symbol', 'feature')
[inherit] base chunk map: {'time': (16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384

In [36]:
name, ds_rsi14  = make_rsi(base, span=14)
save_cube(ds_rsi14, root, subdir="features", name=name)

[_make_name] params: {"method": "rsi=ema(gain)/ema(|delta|)", "span": 14}
[_make_name] generated: mom_rsi_14_a289
[wrap] feature: rsi_14
[wrap] source dims: ('symbol', 'time') shape: (4, 2662909) dtype: float64
[wrap] source coord summary: {'symbol': {'dtype': 'object', 'type': 'ndarray'}, 'time': {'dtype': 'datetime64[ns]', 'type': 'ndarray'}}
[wrap] after transpose dims: ('time', 'symbol') shape: (2662909, 4)
[wrap] after astype dtype: float32
[wrap] after expand dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1)
[inherit] incoming array dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1) dtype: float32
[inherit] base dims: ('time', 'symbol', 'feature')
[inherit] base chunk map: {'time': (16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384

In [37]:
name, ds_zC240  = make_zscore(base, source_feature="Close", window=240, strict_tminus1=True)
save_cube(ds_zC240, root, subdir="features", name=name)


[_make_name] params: {"src": "Close", "strict_tminus1": true, "window": 240}
[_make_name] generated: z_Close_w240_1f15
[wrap] feature: z_Close_240
[wrap] source dims: ('time', 'symbol') shape: (2662909, 4) dtype: float32
[wrap] source coord summary: {'time': {'dtype': 'datetime64[ns]', 'type': 'ndarray'}, 'symbol': {'dtype': 'object', 'type': 'ndarray'}}
[wrap] after transpose dims: ('time', 'symbol') shape: (2662909, 4)
[wrap] after astype dtype: float32
[wrap] after expand dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1)
[inherit] incoming array dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1) dtype: float32
[inherit] base dims: ('time', 'symbol', 'feature')
[inherit] base chunk map: {'time': (16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16

In [38]:
name, ds_vr60   = make_volume_ratio(base, window=60)
save_cube(ds_vr60, root, subdir="features", name=name)

[_make_name] params: {"method": "vol/ma", "window": 60}
[_make_name] generated: liq_volratio_w60_0895
[wrap] feature: vol_ratio_60
[wrap] source dims: ('time', 'symbol') shape: (2662909, 4) dtype: float32
[wrap] source coord summary: {'time': {'dtype': 'datetime64[ns]', 'type': 'ndarray'}, 'symbol': {'dtype': 'object', 'type': 'ndarray'}}
[wrap] after transpose dims: ('time', 'symbol') shape: (2662909, 4)
[wrap] after astype dtype: float32
[wrap] after expand dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1)
[inherit] incoming array dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1) dtype: float32
[inherit] base dims: ('time', 'symbol', 'feature')
[inherit] base chunk map: {'time': (16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 1638

In [39]:
name, ds_tbr    = make_taker_buy_ratio(base)
save_cube(ds_tbr, root, subdir="features", name=name)

[_make_name] params: {"method": "taker_buy_ratio"}
[_make_name] generated: of_taker_buy_ratio_967f
[wrap] feature: taker_buy_ratio
[wrap] source dims: ('time', 'symbol') shape: (2662909, 4) dtype: float32
[wrap] source coord summary: {'time': {'dtype': 'datetime64[ns]', 'type': 'ndarray'}, 'symbol': {'dtype': 'object', 'type': 'ndarray'}}
[wrap] after transpose dims: ('time', 'symbol') shape: (2662909, 4)
[wrap] after astype dtype: float32
[wrap] after expand dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1)
[inherit] incoming array dims: ('time', 'symbol', 'feature') shape: (2662909, 4, 1) dtype: float32
[inherit] base dims: ('time', 'symbol', 'feature')
[inherit] base chunk map: {'time': (16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 1638