## config

### ~~Minio config~~
1. ~~修改docker-compose.yaml中的用户名和密码，控制台输入 `docker-compose up -d` 运行后台minio.~~
2. ~~登录`http://localhost:9001/`~~
3. ~~创建bucket `finance-tiledb`~~
4. ~~创建access keys，并复制粘贴在以下代码~~

In [157]:
import tiledb

config = tiledb.Config()

# Set configuration parameters
config["sm.check_coord_dups"] = "false"
# turn off Coordinate Out-of-bounds Check
config["sm.check_coord_oob"] = False
config["sm.tile_cache_size"] = 100000000
# config["sm.compute_concurrency_level"] = 10
# config["sm.io_concurrency_level"] = 30
# config["vfs.s3.max_parallel_ops"] = 30
# config["vfs.min_batch_size"] = 20971520
# config["vfs.min_batch_gap"] = 512000
config["vfs.s3.scheme"] = "http"
config["vfs.s3.region"] = "" # timezone
config["vfs.s3.endpoint_override"] = "localhost:9000"
config["vfs.s3.use_virtual_addressing"] = "false"
# TODO 换成自己的ACCESS_KEY
config["vfs.s3.aws_access_key_id"] = ""
# TODO 换成自己的SECRET
config["vfs.s3.aws_secret_access_key"] = ""

# Create contex object
ctx = tiledb.Ctx(config)

tiledb_bucket = "s3://finance-tiledb"

### Local/Remote disk directory

In [1]:
import tiledb

config = tiledb.Config()
# Set configuration parameters
config["sm.check_coord_dups"] = "false"
# turn off Coordinate Out-of-bounds Check
config["sm.check_coord_oob"] = False
config["sm.tile_cache_size"] = 100000000

# Create contex object
ctx = tiledb.Ctx(config)

# TODO 换成自己的共享目录或者本地目录
# tiledb_bucket = "//DESKTOP-6NI30V4/bt_data/finance-tiledb" # Remote directory
tiledb_bucket = "datas/finance-tiledb" # Local directory

### Create cn_stock group

In [2]:
cn_stock = 'cn_stock'
cn_stock_group_name = f"{tiledb_bucket}/{cn_stock}"

# delete group
# my_group = f"{tiledb_bucket}/my_group"
# mygrp = tiledb.Group(my_group, "m", ctx=ctx)
# mygrp.delete()

try:
    tiledb.group_create(cn_stock_group_name, ctx=ctx)
except Exception as e:
    print(e)

Group: Cannot create group; Group 'file:///d:/lzq/quantdata/datas/finance-tiledb/cn_stock' already exists


## cn_stock/bars/daily

### Create array schema

In [3]:
import numpy as np
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"

try:
    tiledb.group_create(f"{cn_stock_group_name}/bars", ctx=ctx)
except Exception as e:
    print(e)

# delete array_bars_stock_daily
try:
    tiledb.Array.delete_array(array_bars_stock_daily, ctx=ctx)
except:
    pass

lz4_filter = tiledb.LZ4Filter()
filters = tiledb.FilterList([lz4_filter])
# zstd_filter = tiledb.ZstdFilter()
# filters2 = tiledb.FilterList([tiledb.ByteShuffleFilter(), zstd_filter])

attrs = [
    tiledb.Attr(name="name", dtype='U4', filters=filters),
    tiledb.Attr(name="open", dtype=np.float32, filters=filters),
    tiledb.Attr(name="high", dtype=np.float32, filters=filters),
    tiledb.Attr(name="low", dtype=np.float32, filters=filters),
    tiledb.Attr(name="close", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_open", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_high", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_low", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_close", dtype=np.float32, filters=filters),
    tiledb.Attr(name="volume", dtype=np.uint32, filters=filters),
    tiledb.Attr(name="amount", dtype=np.uint64, filters=filters),
    tiledb.Attr(name="preclose", dtype=np.float32, filters=filters),

    tiledb.Attr(name="net_profit_ttm", dtype=np.float64, filters=filters),
    tiledb.Attr(name="cashflow_ttm", dtype=np.float64, filters=filters),
    tiledb.Attr(name="equity", dtype=np.float64, filters=filters),
    tiledb.Attr(name="asset", dtype=np.float64, filters=filters),
    tiledb.Attr(name="debt", dtype=np.float64, filters=filters),
    tiledb.Attr(name="debttoasset", dtype=np.float32, filters=filters),
    tiledb.Attr(name="net_profit_q", dtype=np.float64, filters=filters),
    tiledb.Attr(name="pe_ttm", dtype=np.float32, filters=filters),
    tiledb.Attr(name="pb", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mkt_cap", dtype=np.float64, filters=filters),
    tiledb.Attr(name="mkt_cap_ashare", dtype=np.float64, filters=filters),

    tiledb.Attr(name="vip_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="vip_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="inst_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="inst_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mid_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mid_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="indi_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="indi_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="master_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="master2_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="vip_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mid_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="inst_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="indi_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="total_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="total_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="net_flow_in", dtype=np.float32, filters=filters),

    tiledb.Attr(name="turnover", dtype=np.float32, filters=filters),
    tiledb.Attr(name="free_shares", dtype=np.uint64, filters=filters),
    tiledb.Attr(name="total_shares", dtype=np.uint64, filters=filters),
    tiledb.Attr(name="maxupordown", dtype=np.uint8, filters=filters),
    tiledb.Attr(name="maxupordown_at_open", dtype=np.uint8, filters=filters),
    tiledb.Attr(name="lb_up_count", dtype=np.uint8, filters=filters),
    tiledb.Attr(name="lb_down_count", dtype=np.uint8, filters=filters),
]
d1 = tiledb.Dim(name='_symbol', domain=(0,7000), tile=1, dtype=np.uint32) # TODO 设置为最大7000只票，超过了需要修改
d2 = tiledb.Dim(name="_dt", domain=(0, 10000), tile=5000, dtype=np.uint32)
dom1 = tiledb.Domain(d1, d2)
dim_labels = {
    0: {
        "symbol": d2.create_label_schema("increasing", 'S9')
    },
    1: {
        "dt": d1.create_label_schema("increasing", "datetime64[s]")
    }
}
schema = tiledb.ArraySchema(domain=dom1, sparse=False, attrs=attrs, cell_order='row-major', tile_order='row-major', dim_labels=dim_labels)
schema.check()
tiledb.Array.create(array_bars_stock_daily, schema, ctx=ctx)
schema

Group: Cannot create group; Group 'file:///d:/lzq/quantdata/datas/finance-tiledb/cn_stock/bars' already exists


Domain
"NameDomainTileData TypeIs Var-lengthFilters_symbol(0, 7000)1uint32False-_dt(0, 10000)5000uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersnameTrueFalse Name Option Level LZ4Filter level-1 openfloat32FalseFalse Name Option Level LZ4Filter level-1 highfloat32FalseFalse Name Option Level LZ4Filter level-1 lowfloat32FalseFalse Name Option Level LZ4Filter level-1 closefloat32FalseFalse Name Option Level LZ4Filter level-1 _openfloat32FalseFalse Name Option Level LZ4Filter level-1 _highfloat32FalseFalse Name Option Level LZ4Filter level-1 _lowfloat32FalseFalse Name Option Level LZ4Filter level-1 _closefloat32FalseFalse Name Option Level LZ4Filter level-1 volumeuint32FalseFalse Name Option Level LZ4Filter level-1 amountuint64FalseFalse Name Option Level LZ4Filter level-1 preclosefloat32FalseFalse Name Option Level LZ4Filter level-1 net_profit_ttmfloat64FalseFalse Name Option Level LZ4Filter level-1 cashflow_ttmfloat64FalseFalse Name Option Level LZ4Filter level-1 equityfloat64FalseFalse Name Option Level LZ4Filter level-1 assetfloat64FalseFalse Name Option Level LZ4Filter level-1 debtfloat64FalseFalse Name Option Level LZ4Filter level-1 debttoassetfloat32FalseFalse Name Option Level LZ4Filter level-1 net_profit_qfloat64FalseFalse Name Option Level LZ4Filter level-1 pe_ttmfloat32FalseFalse Name Option Level LZ4Filter level-1 pbfloat32FalseFalse Name Option Level LZ4Filter level-1 mkt_capfloat64FalseFalse Name Option Level LZ4Filter level-1 mkt_cap_asharefloat64FalseFalse Name Option Level LZ4Filter level-1 vip_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 vip_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 inst_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 inst_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 mid_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 mid_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 indi_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 indi_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 master_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 master2_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 vip_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 mid_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 inst_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 indi_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 total_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 total_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 turnoverfloat32FalseFalse Name Option Level LZ4Filter level-1 free_sharesuint64FalseFalse Name Option Level LZ4Filter level-1 total_sharesuint64FalseFalse Name Option Level LZ4Filter level-1 maxupordownuint8FalseFalse Name Option Level LZ4Filter level-1 maxupordown_at_openuint8FalseFalse Name Option Level LZ4Filter level-1 lb_up_countuint8FalseFalse Name Option Level LZ4Filter level-1 lb_down_countuint8FalseFalse Name Option Level LZ4Filter level-1
Cell Order
row-major
Tile Order
row-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_symbol,"(0, 7000)",1,uint32,False,-
_dt,"(0, 10000)",5000,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
name,TrueFalse Name Option Level LZ4Filter level-1,,,
open,float32,False,False,Name Option Level LZ4Filter level-1
high,float32,False,False,Name Option Level LZ4Filter level-1
low,float32,False,False,Name Option Level LZ4Filter level-1
close,float32,False,False,Name Option Level LZ4Filter level-1
_open,float32,False,False,Name Option Level LZ4Filter level-1
_high,float32,False,False,Name Option Level LZ4Filter level-1
_low,float32,False,False,Name Option Level LZ4Filter level-1
_close,float32,False,False,Name Option Level LZ4Filter level-1
volume,uint32,False,False,Name Option Level LZ4Filter level-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1


### Write

- 写入同一个cell, label index如果不同，前一个会被覆盖掉
- Label index要连续，中间不能有空值
- Label index要按递增的顺序写入
- Label index如果覆盖之后，破坏了顺序，写入时不会报错，读取时会报错
- DenseArray 不支持删除指定行列

In [4]:
# 写入一行
import pandas as pd
import numpy as np

df = pd.read_csv("datasources/000001.SZ.csv", parse_dates=['dt'])
df = df.astype({'name': str})
# print(df)
# print(df.info())

data = df.to_dict('series')
for attrname, col in data.items():
    data[attrname] = col.to_numpy()
data['symbol'] = np.array(['000001.SZ'])
# print(data)

with tiledb.open(array_bars_stock_daily, mode='w', ctx=ctx) as A:
    A[0:1,0:7980] = data

- [bug] [MacOS 10.15.17 异常] [Windows 10 正常] 当写入这一列之后，`python -m pytest tests/test_tiledb.py` 测试用例随机报错: 
  ```sh
  tiledb.cc.TileDBError: DimensionLabelQuery: Failed to process and update index ranges for label 'dt'. ReaderBase: Attribute out of order
  ```
- [bug]当写入这一列之后，再执行`Consolidation`不生效

In [11]:
# 写入一列
import numpy as np
import pandas as pd
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"

rows=3000
df = pd.read_csv("datasources/000001.SZ.csv", parse_dates=['dt'])
df = df.astype({'name': str})
df = df.iloc[:rows]

data = {}
data = df.to_dict('series')
for attrname, col in data.items():
    data[attrname] = col.to_numpy()
data['symbol'] = np.array([f'{i+1:0>6}.SZ' for i in range(rows)])
data['dt'] = np.array([ '2024-08-17T08:00:00.000000000'], dtype='datetime64[ns]')
# print(data)

with tiledb.open(array_bars_stock_daily, mode='w', ctx=ctx) as A:
    A[0:rows, 7980] = data

In [16]:
# 写入单元格
import pandas as pd
import numpy as np
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"
df = pd.read_csv("datasources/000001.SZ.csv", parse_dates=['dt'])
df = df.astype({'name': str})
df = df.iloc[-1:]
# print(df)
# print(df.info())

data = df.to_dict('series')
for attrname, col in data.items():
    data[attrname] = col.to_numpy()
data['symbol'] = np.array(['000001.SZ'])
data['dt'] = np.array([ '2024-08-17T08:00:00.000000000'], dtype='datetime64[ns]')
print(data)

with tiledb.open(array_bars_stock_daily, mode='w', ctx=ctx) as A:
    A[0,7980] = data

{'dt': array(['2024-08-17T08:00:00.000000000'], dtype='datetime64[ns]'), 'name': array(['平安银行'], dtype=object), '_open': array([10.08]), '_high': array([10.21]), '_low': array([10.05]), '_close': array([10.13]), 'volume': array([126036761], dtype=int64), 'amount': array([1277042001], dtype=int64), 'preclose': array([10.03]), 'net_profit_ttm': array([4.69470003e+10]), 'cashflow_ttm': array([1.61941996e+11]), 'equity': array([4.81869005e+11]), 'asset': array([5.75403301e+12]), 'debt': array([5.27216411e+12]), 'debttoasset': array([0.91626]), 'net_profit_q': array([1.09470003e+10]), 'pe_ttm': array([4.1873]), 'pb': array([0.4772]), 'mkt_cap': array([1.96581951e+11]), 'mkt_cap_ashare': array([1.96578906e+11]), 'vip_buy_amt': array([36871.17188]), 'vip_sell_amt': array([47651.91016]), 'inst_buy_amt': array([41308.48828]), 'inst_sell_amt': array([27590.83984]), 'mid_buy_amt': array([29032.]), 'mid_sell_amt': array([29776.36914]), 'indi_buy_amt': array([20492.53906]), 'indi_sell_amt': array([

### Consolidation

In [18]:
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"
tiledb.consolidate(array_bars_stock_daily)
# tiledb.consolidate(array_bars_stock_daily, ctx=tiledb.Ctx(tiledb.Config(
#     {
#         "sm.consolidation.timestamp_start": 1724329475913,
#         "sm.consolidation.timestamp_end": 1724332212656,
#         "sm.consolidation.mode": "fragments",
#     }
# )))

# Consolidate array at array_uri
tiledb.vacuum(array_bars_stock_daily)
# tiledb.vacuum(array_bars_stock_daily, config=tiledb.Config({"sm.vacuum.mode": "fragments"}))

### Read

In [5]:
import pandas as pd
import quantdata as qd
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"

with qd.tiledb_connect(array_bars_stock_daily) as A:
    # %timeit qd.tiledb_get_array(A, indexer=(('symbol',), ("000001.SZ", slice(None, None))))
    a = qd.tiledb_get_array(A, indexer=(('symbol',), ("000001.SZ", slice(None, None))))
    a.pop('symbol')
pd.DataFrame({col: arr.flatten() for col, arr in a.items()})

Unnamed: 0,name,open,high,low,close,_open,_high,_low,_close,volume,...,total_sell_amt,total_buy_amt,net_flow_in,turnover,free_shares,total_shares,maxupordown,maxupordown_at_open,lb_up_count,lb_down_count
0,深发展Ａ,7.07077,7.07077,7.07077,7.07077,66.400002,66.400002,66.400002,66.400002,21200,...,0.000000,0.000000,0.00,0.00057,37099996,67900000,0,0,0,0
1,深发展Ａ,7.03562,7.03562,7.03562,7.03562,66.070000,66.070000,66.070000,66.070000,16800,...,0.000000,0.000000,0.00,0.00045,37100000,67900000,0,0,0,0
2,深发展Ａ,7.07290,7.07290,7.07290,7.07290,66.419998,66.419998,66.419998,66.419998,40200,...,0.000000,0.000000,0.00,0.00108,37100000,67900008,0,0,0,0
3,深发展Ａ,7.00261,7.00261,7.00261,7.00261,65.760002,65.760002,65.760002,65.760002,6100,...,0.000000,0.000000,0.00,0.00016,37099996,67899992,0,0,0,0
4,深发展Ａ,6.96747,6.96747,6.96747,6.96747,65.430000,65.430000,65.430000,65.430000,4900,...,0.000000,0.000000,0.00,0.00013,37100000,67900000,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7975,平安银行,10.02000,10.05000,9.98000,10.01000,10.020000,10.050000,9.980000,10.010000,35301346,...,35325.140625,35325.128906,-0.01,0.00182,19405546950,19405918198,0,0,0,0
7976,平安银行,10.02000,10.03000,9.92000,9.95000,10.020000,10.030000,9.920000,9.950000,62207719,...,62006.140625,62006.140625,-0.00,0.00321,19405546950,19405918198,0,0,0,0
7977,平安银行,9.94000,9.98000,9.91000,9.92000,9.940000,9.980000,9.910000,9.920000,43638038,...,43338.050781,43338.039062,-0.01,0.00225,19405546950,19405918198,0,0,0,0
7978,平安银行,9.92000,10.06000,9.87000,10.03000,9.920000,10.060000,9.870000,10.030000,77624747,...,77585.148438,77585.148438,0.00,0.00400,19405546950,19405918198,0,0,0,0


### Undo: delete fragments

- fragments删除了但是Label index没有删除; 将对应Label index写为None，但是长度又没变。修改LabelIndex一定要谨慎。
- 删除不存在的fragment不会有任何报错

In [46]:
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"

fragments_info = tiledb.array_fragments(array_bars_stock_daily)
for frag in fragments_info:
    print(frag.timestamp_range)
# tiledb.Array.delete_fragments(array_bars_stock_daily, 1724323738946, 1724323738946)

# 修改LabelIndex
# schema = tiledb.ArraySchema.load(array_bars_stock_daily)
# dim_dt = schema.dim_label("dt")
# with tiledb.open(dim_dt.uri, "w") as dt_array:
#     dt_array[7980] = None

(1724329475913, 1724329475913)
(1724329616798, 1724329616798)
(1724329659872, 1724329659872)
(1724331938150, 1724331938150)
(1724332041766, 1724332041766)
(1724332067348, 1724332067348)
(1724332072284, 1724332072284)
(1724332212656, 1724332212656)


## cn_stock/bars/1min(5min,10min,15min,30min,1h,2h,w,m)

### Create array schema

In [32]:
import numpy as np
# 都按100年来估计
def setup_bars(_name, _d1_domain, _d1_tile):
    array_bars = f"{cn_stock_group_name}/bars/{_name}"

    # delete array_bars
    try:
        tiledb.Array.delete_array(array_bars, ctx=ctx)
    except:
        pass

    # filter_list = tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.ZstdFilter()])
    attrs = [
        tiledb.Attr(name="open", dtype=np.float32),
        tiledb.Attr(name="high", dtype=np.float32),
        tiledb.Attr(name="low", dtype=np.float32),
        tiledb.Attr(name="close", dtype=np.float32),
        tiledb.Attr(name="volume", dtype=np.uint64),
        tiledb.Attr(name="amount", dtype=np.float64),
    ]
    d1 = tiledb.Dim(name="_dt", domain=_d1_domain, tile=_d1_tile, dtype=np.uint32)
    d2 = tiledb.Dim(name='_symbol', domain=(0,20000), tile=1, dtype=np.uint32)
    dom1 = tiledb.Domain(d1, d2)
    dim_labels = {
        0: {
            "dt": d1.create_label_schema("increasing", "datetime64[s]")
        },
        1: {
            "symbol": d2.create_label_schema("increasing", np.dtype('U'))
        }
    }
    schema = tiledb.ArraySchema(domain=dom1, sparse=False, attrs=attrs, cell_order='col-major', tile_order='col-major', dim_labels=dim_labels)
    schema.check()
    tiledb.Array.create(array_bars, schema, ctx=ctx)
    return schema

In [34]:
setup_bars("1min", (0, 240*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 6000000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 6000000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [35]:
setup_bars("5min", (0, 48*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 1200000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 1200000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [36]:
setup_bars("15min", (0, 16*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 400000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 400000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [37]:
setup_bars("30min", (0, 8*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 200000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 200000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [38]:
setup_bars("1h", (0, 4*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 100000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 100000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [39]:
setup_bars("2h", (0, 2*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 50000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 50000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [40]:
setup_bars("weekly", (0, 6000), None)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 6000)6001uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 6000)",6001,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [41]:
setup_bars("monthly", (0, 1200), None)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 1200)1201uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 1200)",1201,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


## cn_stock/market_stats

### Create array schema

In [42]:

import numpy as np
array_market_stats = f"{cn_stock_group_name}/market_stats"

# delete array_market_stats
try:
    tiledb.Array.delete_array(array_market_stats, ctx=ctx)
except:
    pass

attrs = [
    tiledb.Attr(name="count_of_uplimit", dtype=np.uint16),
    tiledb.Attr(name="count_of_downlimit", dtype=np.uint16),
    tiledb.Attr(name="count_of_yiziup", dtype=np.uint16),
    tiledb.Attr(name="count_of_yizidown", dtype=np.uint16),
    tiledb.Attr(name="ratio_of_uplimit", dtype=np.float32),
    tiledb.Attr(name="ratio_of_downlimit", dtype=np.float32),
    tiledb.Attr(name="ratio_of_yiziup", dtype=np.float32),
    tiledb.Attr(name="ratio_of_yizidown", dtype=np.float32),
    tiledb.Attr(name="lb", var=True, dtype=np.uint8),
]
d1 = tiledb.Dim(name="dt", domain=(np.datetime64('1990-01-01'), np.datetime64('2100-01-01')), tile=250, dtype="datetime64[s]")
dom1 = tiledb.Domain(d1)
schema = tiledb.ArraySchema(domain=dom1, sparse=False, attrs=attrs, cell_order='col-major', tile_order='col-major')
schema.check()
tiledb.Array.create(array_market_stats, schema, ctx=ctx)
schema


Domain
"NameDomainTileData TypeIs Var-lengthFiltersdt(numpy.datetime64('1990-01-01T00:00:00'), numpy.datetime64('2100-01-01T00:00:00'))250 secondsdatetime64[s]False-"
Attributes
NameData TypeIs Var-LenIs NullableFilterscount_of_uplimituint16FalseFalse-count_of_downlimituint16FalseFalse-count_of_yiziupuint16FalseFalse-count_of_yizidownuint16FalseFalse-ratio_of_uplimitfloat32FalseFalse-ratio_of_downlimitfloat32FalseFalse-ratio_of_yiziupfloat32FalseFalse-ratio_of_yizidownfloat32FalseFalse-lbuint8TrueFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
dt,"(numpy.datetime64('1990-01-01T00:00:00'), numpy.datetime64('2100-01-01T00:00:00'))",250 seconds,datetime64[s],False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
count_of_uplimit,uint16,False,False,-
count_of_downlimit,uint16,False,False,-
count_of_yiziup,uint16,False,False,-
count_of_yizidown,uint16,False,False,-
ratio_of_uplimit,float32,False,False,-
ratio_of_downlimit,float32,False,False,-
ratio_of_yiziup,float32,False,False,-
ratio_of_yizidown,float32,False,False,-
lb,uint8,True,False,-
