# 配置

## Mongodb config
1. 修改docker-compose.yaml中的用户名和密码，控制台输入 `docker-compose up -d` 运行后台mongodb.
2. 登录`http://localhost:8081/`

In [2]:
from mongodb_helper import *

# mongodb config
mg_host = "localhost"
mg_port = 27017
mg_user = "root"
mg_password = "admin"

## Tiledb config

### ~~Minio config~~
1. ~~修改docker-compose.yaml中的用户名和密码，控制台输入 `docker-compose up -d` 运行后台minio.~~
2. ~~登录`http://localhost:9001/`~~
3. ~~创建bucket `finance-tiledb`~~
4. ~~创建access keys，并复制粘贴在以下代码~~

In [157]:
import tiledb

config = tiledb.Config()

# Set configuration parameters
config["sm.check_coord_dups"] = "false"
# turn off Coordinate Out-of-bounds Check
config["sm.check_coord_oob"] = False
config["sm.tile_cache_size"] = 100000000
# config["sm.compute_concurrency_level"] = 10
# config["sm.io_concurrency_level"] = 30
# config["vfs.s3.max_parallel_ops"] = 30
# config["vfs.min_batch_size"] = 20971520
# config["vfs.min_batch_gap"] = 512000
config["vfs.s3.scheme"] = "http"
config["vfs.s3.region"] = "" # timezone
config["vfs.s3.endpoint_override"] = "localhost:9000"
config["vfs.s3.use_virtual_addressing"] = "false"
# TODO 换成自己的ACCESS_KEY
config["vfs.s3.aws_access_key_id"] = ""
# TODO 换成自己的SECRET
config["vfs.s3.aws_secret_access_key"] = ""

# Create contex object
ctx = tiledb.Ctx(config)

tiledb_bucket = "s3://finance-tiledb"

### Local/Remote disk directory

In [3]:
import tiledb

config = tiledb.Config()
# Set configuration parameters
config["sm.check_coord_dups"] = "false"
# turn off Coordinate Out-of-bounds Check
config["sm.check_coord_oob"] = False
config["sm.tile_cache_size"] = 100000000

# Create contex object
ctx = tiledb.Ctx(config)

# TODO 换成自己的共享目录或者本地目录
# tiledb_bucket = "//DESKTOP-6NI30V4/bt_data/finance-tiledb" # Remote directory
tiledb_bucket = "~/WorkingDoc/tiledb/finance-tiledb" # Local directory

# 中国A股市场

In [4]:
cn_stock = 'cn_stock'
cn_stock_group_name = f"{tiledb_bucket}/{cn_stock}"

# delete group
# my_group = f"{tiledb_bucket}/my_group"
# mygrp = tiledb.Group(my_group, "m", ctx=ctx)
# mygrp.delete()

try:
    tiledb.group_create(cn_stock_group_name, ctx=ctx)
except Exception as e:
    print(e)

Group: Cannot create group; Group 'file:///Users/qiong/WorkingDoc/tiledb/finance-tiledb/cn_stock' already exists


## 交易日历 trade_cal

|字段名|说明|类型|
|--|--|--|
|_id [index] |日期|datetime64[s]|
|status|是否交易：0-不交易 1-交易|int8|

### Overwrite

In [16]:
import pandas as pd
import numpy as np

df = pd.read_csv("datasources/trade_cal.csv", parse_dates=['dt'], date_format='%Y%m%d', dtype={'is_open': np.uint8})
df = df.rename(columns={'dt': '_id', 'is_open': 'status'}) # mongodb要求必须有_id
# print(df.info())

with mongo_connect(mg_host, mg_port, mg_user, mg_password) as mg:
    db = mg[cn_stock]
    db.drop_collection('trade_cal')
    db['trade_cal'].insert_many(df.to_dict(orient='records'))

### Read

In [17]:
import pandas as pd
import quantdata as qd

with qd.mongo_connect(mg_host, mg_port, mg_user, mg_password) as mg:
    db = mg['cn_stock']
    c = qd.mongo_get_trade_cal(db)
    df = pd.DataFrame(c)
df

Unnamed: 0,_id,status
0,1990-12-19,1
1,1990-12-20,1
2,1990-12-21,1
3,1990-12-24,1
4,1990-12-25,1
...,...,...
8307,2024-12-25,1
8308,2024-12-26,1
8309,2024-12-27,1
8310,2024-12-30,1


## 证券基本信息 stocks_basic_info

|字段名|说明|类型|
|--|--|--|
|symbol|股票代码|string|
|name|股票名称|string|
|area|地域|string|
|industry|所属行业|string|
|fullname|股票全称|string|
|enname|英文全称|string|
|cnspell|拼音缩写|string|
|market|市场类型（主板/创业板/科创板/CDR）|string|
|exchange|交易所代码（SSE/SZSE/BSE）|string|
|curr_type|交易货币：CNY|string|
|list_date|上市日期|datetime64[D]|
|delist_date|退市日期|datetime64[D]|
|status|上市状态，其中L上市 D退市 P暂停上市|string|
|is_hs|是否沪深港通标的，N否 H沪股通 S深股通|string|

### Overwrite

In [20]:
import pandas as pd

df = pd.read_csv("datasources/stocks_basic_info.csv", parse_dates=['list_date', 'delist_date'], date_format='%Y%m%d', index_col=0)
df = df.drop(columns=['symbol'])
df = df.rename(columns={'ts_code':'symbol', 'list_status':'status'})
df = df.fillna('')
# print(df.info())

with mongo_connect(mg_host, mg_port, mg_user, mg_password) as mg:
    db = mg[cn_stock]
    db.drop_collection('stocks_basic_info')
    insert_many_ignore_nan(db['stocks_basic_info'], df)

### Read

In [6]:
import pandas as pd
import quantdata as qd

with qd.mongo_connect(mg_host, mg_port, mg_user, mg_password) as mg:
    db = mg['cn_stock']
    c = qd.mongo_get_data(db, 'stocks_basic_info')
    df = pd.DataFrame(c)
df

Unnamed: 0,_id,symbol,name,area,industry,fullname,enname,cnspell,market,exchange,curr_type,status,list_date,is_hs,delist_date
0,66c2e1d79973412d57c72f53,000001.SZ,平安银行,深圳,银行,平安银行股份有限公司,"Ping An Bank Co., Ltd.",payh,主板,SZSE,CNY,L,1991-04-03,S,NaT
1,66c2e1d79973412d57c72f54,000002.SZ,万科A,深圳,全国地产,万科企业股份有限公司,"China Vanke Co.,Ltd.",wka,主板,SZSE,CNY,L,1991-01-29,S,NaT
2,66c2e1d79973412d57c72f55,000004.SZ,国华网安,深圳,软件服务,深圳国华网安科技股份有限公司,Shenzhen Guohua Network Security Technology Co...,ghwa,主板,SZSE,CNY,L,1991-01-14,N,NaT
3,66c2e1d79973412d57c72f56,000006.SZ,深振业A,深圳,区域地产,深圳市振业(集团)股份有限公司,"Shenzhen Zhenye(Group) Co., Ltd.",szya,主板,SZSE,CNY,L,1992-04-27,S,NaT
4,66c2e1d79973412d57c72f57,000007.SZ,全新好,深圳,其他商业,深圳市全新好股份有限公司,"Shenzhen Quanxinhao Co,.Ltd.",qxh,主板,SZSE,CNY,L,1992-04-13,N,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5621,66c2e1d79973412d57c74548,688555.SH,退泽达(退),,,泽达易盛(天津)科技股份有限公司,"Essence Information Technology Co., Ltd.",tszd,科创板,SSE,CNY,D,2020-06-23,N,2023-07-07
5622,66c2e1d79973412d57c74549,832317.BJ,观典防务(退),,,观典防务技术股份有限公司,"Guandian Defense Technology Co.,Ltd.",gdfw,北交所,BSE,CNY,D,2020-07-27,N,2022-04-26
5623,66c2e1d79973412d57c7454a,833874.BJ,泰祥股份(退),,,十堰市泰祥实业股份有限公司,"Shiyan Taixiang Industry Co., Ltd.",txgf,北交所,BSE,CNY,D,2020-07-27,N,2022-07-18
5624,66c2e1d79973412d57c7454b,833994.BJ,翰博高新(退),,,翰博高新材料(合肥)股份有限公司,"Highbroad Advanced Material (Hefei) Co., Ltd.",hbgx,北交所,BSE,CNY,D,2020-07-27,N,2022-07-25



## 日线详情数据 bars/daily

|字段名|说明|类型|
|--|--|--|
|dt [index]|交易日期|datetime64[s]|
|name|股票名称|string|
|open|开盘价(前复权)|float32|
|high|最高价(前复权)|float32|
|low|最低价(前复权)|float32|
|close|收盘价(前复权)|float32|
|_open|开盘价|float32|
|_high|最高价|float32|
|_low|最低价|float32|
|_close|收盘价|float32|
|preclose|前收盘价，若当天发生除权，前收盘价为上个交易日复权之后的收盘价|float32|
|volume|成交量（股）|uint32|
|amount|成交额（元）|uint64|
|net_profit_ttm|净利润TTM(不含少数股东损益)|float64|
|cashflow_ttm|现金流TTM, 经营活动产生的现金流量净额|float64|
|equity|净资产|float64|
|asset|总资产|float64|
|debt|总负债|float64|
|debttoasset|资产负债率|float32|
|net_profit_q|净利润(当季)|float64|
|pe_ttm|滚动市盈率|float32|
|pb|市净率|float32|
|mkt_cap|总市值(元)|float64|
|mkt_cap_ashare|流通市值(元)|float64|
|vip_buy_amt|大户资金买入额(万元)|float32|
|vip_sell_amt|大户资金卖出额(万元)|float32|
|inst_buy_amt|机构资金买入额(万元)|float32|
|inst_sell_amt|机构资金卖出额(万元)|float32|
|mid_buy_amt|中户资金买入额(万元)|float32|
|mid_sell_amt|中户资金卖出额(万元)|float32|
|indi_buy_amt|散户资金买入额(万元)|float32|
|indi_sell_amt|散户资金卖出额(万元)|float32|
|master_net_flow_in|主力(机构和大户)净买入(万元)|float32|
|master2_net_flow_in|主力2(机构、大户和中户)净买入(万元)|float32|
|vip_net_flow_in|大户净流入(万元)|float32|
|mid_net_flow_in|中户净流入(万元)|float32|
|inst_net_flow_in|机构净流入(万元)|float32|
|indi_net_flow_in|散户净流入(万元)|float32|
|total_sell_amt|流出资金总额(万元)|float32|
|total_buy_amt|流入资金总额(万元)|float32|
|net_flow_in|资金净流入(万元)|float32|
|turnover|换手率|float32|
|free_shares|流通股本|uint64|
|total_shares|总股本|uint64|
|maxupordown|标记收盘涨停或跌停状态,1-涨停,2-一字板涨停；-1-跌停，-2-一字板跌停；0-未涨跌停|uint8|
|maxupordown_at_open|标记开盘涨停或跌停状态，状态码同上|uint8|
|lb_up_count|连板涨停次数|uint8|
|lb_down_count|连板跌停次数|uint8|


In [30]:
import numpy as np
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"

try:
    tiledb.group_create(f"{cn_stock_group_name}/bars", ctx=ctx)
except Exception as e:
    print(e)

# delete array_bars_stock_daily
try:
    tiledb.Array.delete_array(array_bars_stock_daily, ctx=ctx)
except:
    pass

lz4_filter = tiledb.LZ4Filter()
filters = tiledb.FilterList([lz4_filter])
# zstd_filter = tiledb.ZstdFilter()
# filters2 = tiledb.FilterList([tiledb.ByteShuffleFilter(), zstd_filter])

attrs = [
    tiledb.Attr(name="name", dtype='U4', filters=filters),
    tiledb.Attr(name="open", dtype=np.float32, filters=filters),
    tiledb.Attr(name="high", dtype=np.float32, filters=filters),
    tiledb.Attr(name="low", dtype=np.float32, filters=filters),
    tiledb.Attr(name="close", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_open", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_high", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_low", dtype=np.float32, filters=filters),
    tiledb.Attr(name="_close", dtype=np.float32, filters=filters),
    tiledb.Attr(name="volume", dtype=np.uint32, filters=filters),
    tiledb.Attr(name="amount", dtype=np.uint64, filters=filters),
    tiledb.Attr(name="preclose", dtype=np.float32, filters=filters),

    tiledb.Attr(name="net_profit_ttm", dtype=np.float64, filters=filters),
    tiledb.Attr(name="cashflow_ttm", dtype=np.float64, filters=filters),
    tiledb.Attr(name="equity", dtype=np.float64, filters=filters),
    tiledb.Attr(name="asset", dtype=np.float64, filters=filters),
    tiledb.Attr(name="debt", dtype=np.float64, filters=filters),
    tiledb.Attr(name="debttoasset", dtype=np.float32, filters=filters),
    tiledb.Attr(name="net_profit_q", dtype=np.float64, filters=filters),
    tiledb.Attr(name="pe_ttm", dtype=np.float32, filters=filters),
    tiledb.Attr(name="pb", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mkt_cap", dtype=np.float64, filters=filters),
    tiledb.Attr(name="mkt_cap_ashare", dtype=np.float64, filters=filters),

    tiledb.Attr(name="vip_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="vip_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="inst_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="inst_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mid_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mid_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="indi_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="indi_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="master_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="master2_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="vip_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="mid_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="inst_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="indi_net_flow_in", dtype=np.float32, filters=filters),
    tiledb.Attr(name="total_sell_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="total_buy_amt", dtype=np.float32, filters=filters),
    tiledb.Attr(name="net_flow_in", dtype=np.float32, filters=filters),

    tiledb.Attr(name="turnover", dtype=np.float32, filters=filters),
    tiledb.Attr(name="free_shares", dtype=np.uint64, filters=filters),
    tiledb.Attr(name="total_shares", dtype=np.uint64, filters=filters),
    tiledb.Attr(name="maxupordown", dtype=np.uint8, filters=filters),
    tiledb.Attr(name="maxupordown_at_open", dtype=np.uint8, filters=filters),
    tiledb.Attr(name="lb_up_count", dtype=np.uint8, filters=filters),
    tiledb.Attr(name="lb_down_count", dtype=np.uint8, filters=filters),
]
d1 = tiledb.Dim(name='_symbol', domain=(0,7000), tile=1, dtype=np.uint32) # TODO 设置为最大7000只票，超过了需要修改
d2 = tiledb.Dim(name="_dt", domain=(0, 10000), tile=5000, dtype=np.uint32)
dom1 = tiledb.Domain(d1, d2)
dim_labels = {
    0: {
        "symbol": d2.create_label_schema("increasing", 'S9')
    },
    1: {
        "dt": d1.create_label_schema("increasing", "datetime64[s]")
    }
}
schema = tiledb.ArraySchema(domain=dom1, sparse=False, attrs=attrs, cell_order='row-major', tile_order='row-major', dim_labels=dim_labels)
schema.check()
tiledb.Array.create(array_bars_stock_daily, schema, ctx=ctx)
schema

Group: Cannot create group; Group 'file:///Users/qiong/Desktop/quantdata/datas/finance-tiledb/cn_stock/bars' already exists


Domain
"NameDomainTileData TypeIs Var-lengthFilters_symbol(0, 7000)1uint32False-_dt(0, 10000)5000uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersnameTrueFalse Name Option Level LZ4Filter level-1 openfloat32FalseFalse Name Option Level LZ4Filter level-1 highfloat32FalseFalse Name Option Level LZ4Filter level-1 lowfloat32FalseFalse Name Option Level LZ4Filter level-1 closefloat32FalseFalse Name Option Level LZ4Filter level-1 _openfloat32FalseFalse Name Option Level LZ4Filter level-1 _highfloat32FalseFalse Name Option Level LZ4Filter level-1 _lowfloat32FalseFalse Name Option Level LZ4Filter level-1 _closefloat32FalseFalse Name Option Level LZ4Filter level-1 volumeuint32FalseFalse Name Option Level LZ4Filter level-1 amountuint64FalseFalse Name Option Level LZ4Filter level-1 preclosefloat32FalseFalse Name Option Level LZ4Filter level-1 net_profit_ttmfloat64FalseFalse Name Option Level LZ4Filter level-1 cashflow_ttmfloat64FalseFalse Name Option Level LZ4Filter level-1 equityfloat64FalseFalse Name Option Level LZ4Filter level-1 assetfloat64FalseFalse Name Option Level LZ4Filter level-1 debtfloat64FalseFalse Name Option Level LZ4Filter level-1 debttoassetfloat32FalseFalse Name Option Level LZ4Filter level-1 net_profit_qfloat64FalseFalse Name Option Level LZ4Filter level-1 pe_ttmfloat32FalseFalse Name Option Level LZ4Filter level-1 pbfloat32FalseFalse Name Option Level LZ4Filter level-1 mkt_capfloat64FalseFalse Name Option Level LZ4Filter level-1 mkt_cap_asharefloat64FalseFalse Name Option Level LZ4Filter level-1 vip_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 vip_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 inst_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 inst_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 mid_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 mid_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 indi_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 indi_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 master_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 master2_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 vip_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 mid_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 inst_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 indi_net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 total_sell_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 total_buy_amtfloat32FalseFalse Name Option Level LZ4Filter level-1 net_flow_infloat32FalseFalse Name Option Level LZ4Filter level-1 turnoverfloat32FalseFalse Name Option Level LZ4Filter level-1 free_sharesuint64FalseFalse Name Option Level LZ4Filter level-1 total_sharesuint64FalseFalse Name Option Level LZ4Filter level-1 maxupordownuint8FalseFalse Name Option Level LZ4Filter level-1 maxupordown_at_openuint8FalseFalse Name Option Level LZ4Filter level-1 lb_up_countuint8FalseFalse Name Option Level LZ4Filter level-1 lb_down_countuint8FalseFalse Name Option Level LZ4Filter level-1
Cell Order
row-major
Tile Order
row-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_symbol,"(0, 7000)",1,uint32,False,-
_dt,"(0, 10000)",5000,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
name,TrueFalse Name Option Level LZ4Filter level-1,,,
open,float32,False,False,Name Option Level LZ4Filter level-1
high,float32,False,False,Name Option Level LZ4Filter level-1
low,float32,False,False,Name Option Level LZ4Filter level-1
close,float32,False,False,Name Option Level LZ4Filter level-1
_open,float32,False,False,Name Option Level LZ4Filter level-1
_high,float32,False,False,Name Option Level LZ4Filter level-1
_low,float32,False,False,Name Option Level LZ4Filter level-1
_close,float32,False,False,Name Option Level LZ4Filter level-1
volume,uint32,False,False,Name Option Level LZ4Filter level-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1

Name,Option,Level
LZ4Filter,level,-1


### Overwrite
- 写入同一个cell, label index如果不同，前一个会被覆盖掉

In [32]:
import pandas as pd
import numpy as np

df = pd.read_csv("datasources/data.csv", parse_dates=['dt'])
df = df.astype({'name': str})
# print(df)
# print(df.info())

data = df.to_dict('series')
for attrname, col in data.items():
    data[attrname] = col.to_numpy()
    # data[attrname] = col.to_numpy().reshape((1, len(col)))
# data['dt'] = data['dt'].flatten()
data['symbol'] = np.array(['000001.SZ'])
print(data)

with tiledb.open(array_bars_stock_daily, mode='w', ctx=ctx) as A:
    A.meta['last_day'] = np.datetime_as_string(data['dt'][-1], unit='D')
    A[0:1,0:7980] = data

{'dt': array(['1991-01-03T08:00:00.000000000', '1991-01-04T08:00:00.000000000',
       '1991-01-07T08:00:00.000000000', ...,
       '2024-08-14T08:00:00.000000000', '2024-08-15T08:00:00.000000000',
       '2024-08-16T08:00:00.000000000'], dtype='datetime64[ns]'), 'name': array(['深发展Ａ', '深发展Ａ', '深发展Ａ', ..., '平安银行', '平安银行', '平安银行'], dtype=object), '_open': array([66.4 , 66.07, 66.42, ...,  9.94,  9.92, 10.08]), '_high': array([66.4 , 66.07, 66.42, ...,  9.98, 10.06, 10.21]), '_low': array([66.4 , 66.07, 66.42, ...,  9.91,  9.87, 10.05]), '_close': array([66.4 , 66.07, 66.42, ...,  9.92, 10.03, 10.13]), 'volume': array([    21200,     16800,     40200, ...,  43638038,  77624747,
       126036761]), 'amount': array([   1410000,    1109000,    2672000, ...,  433380447,  775851461,
       1277042001]), 'preclose': array([67.41, 66.4 , 66.07, ...,  9.95,  9.92, 10.03]), 'net_profit_ttm': array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
       4.67849994e+10, 4.67849994e+10, 4.69470

In [111]:
import pandas as pd
import numpy as np
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"
df = pd.read_csv("datasources/data.csv", parse_dates=['dt'])
df = df.astype({'name': str})
# print(df)
# print(df.info())

df = df.iloc[-1:]
print(df)

data = df.to_dict('series')
for attrname, col in data.items():
    data[attrname] = col.to_numpy()
    # data[attrname] = col.to_numpy().reshape((1, len(col)))
# data['dt'] = data['dt'].flatten()
data['symbol'] = np.array(['000002.SZ'])
print(data)

# with tiledb.open(array_bars_stock_daily, mode='w', ctx=ctx) as A:
#     A[0,7980] = data

                      dt  name  _open  _high   _low  _close     volume  \
7979 2024-08-16 08:00:00  平安银行  10.08  10.21  10.05   10.13  126036761   

          amount  preclose  net_profit_ttm  ...  free_shares  total_shares  \
7979  1277042001     10.03    4.694700e+10  ...  19405617528   19405918198   

      maxupordown  maxupordown_at_open  lb_up_count  lb_down_count  close  \
7979            0                    0            0              0  10.13   

       open   high    low  
7979  10.08  10.21  10.05  

[1 rows x 48 columns]
{'dt': array(['2024-08-16T08:00:00.000000000'], dtype='datetime64[ns]'), 'name': array(['平安银行'], dtype=object), '_open': array([10.08]), '_high': array([10.21]), '_low': array([10.05]), '_close': array([10.13]), 'volume': array([126036761]), 'amount': array([1277042001]), 'preclose': array([10.03]), 'net_profit_ttm': array([4.69470003e+10]), 'cashflow_ttm': array([1.61941996e+11]), 'equity': array([4.81869005e+11]), 'asset': array([5.75403301e+12]), 'debt'

### Read

In [5]:
import pandas as pd
import quantdata as qd
array_bars_stock_daily = f"{cn_stock_group_name}/bars/daily"

with qd.tiledb_open_array(array_bars_stock_daily) as A:
    # %timeit qd.tiledb_get_array(A, indexer=(('symbol',), ("000001.SZ", slice(None, None))))
    a = qd.tiledb_get_array(A, indexer=(('symbol',), ("000001.SZ", slice(None, None))))
    a.pop('symbol')
pd.DataFrame({col: arr.flatten() for col, arr in a.items()})

Unnamed: 0,name,open,high,low,close,_open,_high,_low,_close,volume,...,total_sell_amt,total_buy_amt,net_flow_in,turnover,free_shares,total_shares,maxupordown,maxupordown_at_open,lb_up_count,lb_down_count
0,深发展Ａ,7.07077,7.07077,7.07077,7.07077,66.400002,66.400002,66.400002,66.400002,21200,...,0.000000,0.000000,0.00,0.00057,37099996,67900000,0,0,0,0
1,深发展Ａ,7.03562,7.03562,7.03562,7.03562,66.070000,66.070000,66.070000,66.070000,16800,...,0.000000,0.000000,0.00,0.00045,37100000,67900000,0,0,0,0
2,深发展Ａ,7.07290,7.07290,7.07290,7.07290,66.419998,66.419998,66.419998,66.419998,40200,...,0.000000,0.000000,0.00,0.00108,37100000,67900008,0,0,0,0
3,深发展Ａ,7.00261,7.00261,7.00261,7.00261,65.760002,65.760002,65.760002,65.760002,6100,...,0.000000,0.000000,0.00,0.00016,37099996,67899992,0,0,0,0
4,深发展Ａ,6.96747,6.96747,6.96747,6.96747,65.430000,65.430000,65.430000,65.430000,4900,...,0.000000,0.000000,0.00,0.00013,37100000,67900000,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7975,平安银行,10.02000,10.05000,9.98000,10.01000,10.020000,10.050000,9.980000,10.010000,35301346,...,35325.140625,35325.128906,-0.01,0.00182,19405546950,19405918198,0,0,0,0
7976,平安银行,10.02000,10.03000,9.92000,9.95000,10.020000,10.030000,9.920000,9.950000,62207719,...,62006.140625,62006.140625,-0.00,0.00321,19405546950,19405918198,0,0,0,0
7977,平安银行,9.94000,9.98000,9.91000,9.92000,9.940000,9.980000,9.910000,9.920000,43638038,...,43338.050781,43338.039062,-0.01,0.00225,19405546950,19405918198,0,0,0,0
7978,平安银行,9.92000,10.06000,9.87000,10.03000,9.920000,10.060000,9.870000,10.030000,77624747,...,77585.148438,77585.148438,0.00,0.00400,19405546950,19405918198,0,0,0,0


## K线数据（分钟线、小时线、周线、月线）bars

已经是前复权的数据

- 分钟线 bars/1min(5min/10min/...)
- 小时线 bars/1h(2h)
- 周线 bars/weekly
- 月线 bars/monthly

|字段名|说明|类型|
|--|--|--|
|dt|交易日期|datetime64[s]|
|open|开盘价|float32|
|high|最高价|float32|
|low|最低价|float32|
|close|收盘价|float32|
|volume|成交量|uint64|
|amount|成交额|float64|

In [32]:
import numpy as np
# 都按100年来估计
def setup_bars(_name, _d1_domain, _d1_tile):
    array_bars = f"{cn_stock_group_name}/bars/{_name}"

    # delete array_bars
    try:
        tiledb.Array.delete_array(array_bars, ctx=ctx)
    except:
        pass

    # filter_list = tiledb.FilterList([tiledb.ByteShuffleFilter(), tiledb.ZstdFilter()])
    attrs = [
        tiledb.Attr(name="open", dtype=np.float32),
        tiledb.Attr(name="high", dtype=np.float32),
        tiledb.Attr(name="low", dtype=np.float32),
        tiledb.Attr(name="close", dtype=np.float32),
        tiledb.Attr(name="volume", dtype=np.uint64),
        tiledb.Attr(name="amount", dtype=np.float64),
    ]
    d1 = tiledb.Dim(name="_dt", domain=_d1_domain, tile=_d1_tile, dtype=np.uint32)
    d2 = tiledb.Dim(name='_symbol', domain=(0,20000), tile=1, dtype=np.uint32)
    dom1 = tiledb.Domain(d1, d2)
    dim_labels = {
        0: {
            "dt": d1.create_label_schema("increasing", "datetime64[s]")
        },
        1: {
            "symbol": d2.create_label_schema("increasing", np.dtype('U'))
        }
    }
    schema = tiledb.ArraySchema(domain=dom1, sparse=False, attrs=attrs, cell_order='col-major', tile_order='col-major', dim_labels=dim_labels)
    schema.check()
    tiledb.Array.create(array_bars, schema, ctx=ctx)
    return schema

In [34]:
setup_bars("1min", (0, 240*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 6000000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 6000000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [35]:
setup_bars("5min", (0, 48*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 1200000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 1200000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [36]:
setup_bars("15min", (0, 16*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 400000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 400000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [37]:
setup_bars("30min", (0, 8*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 200000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 200000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [38]:
setup_bars("1h", (0, 4*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 100000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 100000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [39]:
setup_bars("2h", (0, 2*250*100), 1000)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 50000)1000uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 50000)",1000,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [40]:
setup_bars("weekly", (0, 6000), None)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 6000)6001uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 6000)",6001,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


In [41]:
setup_bars("monthly", (0, 1200), None)

Domain
"NameDomainTileData TypeIs Var-lengthFilters_dt(0, 1200)1201uint32False-_symbol(0, 20000)1uint32False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersopenfloat32FalseFalse-highfloat32FalseFalse-lowfloat32FalseFalse-closefloat32FalseFalse-volumeuint64FalseFalse-amountfloat64FalseFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
_dt,"(0, 1200)",1201,uint32,False,-
_symbol,"(0, 20000)",1,uint32,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
open,float32,False,False,-
high,float32,False,False,-
low,float32,False,False,-
close,float32,False,False,-
volume,uint64,False,False,-
amount,float64,False,False,-


## 每日统计数据 market_stats

排除了ST股

|字段名|说明|类型|
|--|--|--|
|dt [index]|交易日期|datetime64[s]|
|count_of_uplimit|涨停数|uint16|
|count_of_downlimit|跌停数|uint16|
|count_of_yiziup|一字涨停数|uint16|
|count_of_yizidown|一字跌停数|uint16|
|ratio_of_uplimit|涨停数占所有股数比例|float32|
|ratio_of_downlimit|跌停数占所有股数比例|float32|
|ratio_of_yiziup|一字涨停数占所有股数比例|float32|
|ratio_of_yizidown|一字跌停数占所有股数比例|float32|
|lb|连板数量[首板数, 二板数, ...]|[uint8,...]|


In [42]:

import numpy as np
array_market_stats = f"{cn_stock_group_name}/market_stats"

# delete array_market_stats
try:
    tiledb.Array.delete_array(array_market_stats, ctx=ctx)
except:
    pass

attrs = [
    tiledb.Attr(name="count_of_uplimit", dtype=np.uint16),
    tiledb.Attr(name="count_of_downlimit", dtype=np.uint16),
    tiledb.Attr(name="count_of_yiziup", dtype=np.uint16),
    tiledb.Attr(name="count_of_yizidown", dtype=np.uint16),
    tiledb.Attr(name="ratio_of_uplimit", dtype=np.float32),
    tiledb.Attr(name="ratio_of_downlimit", dtype=np.float32),
    tiledb.Attr(name="ratio_of_yiziup", dtype=np.float32),
    tiledb.Attr(name="ratio_of_yizidown", dtype=np.float32),
    tiledb.Attr(name="lb", var=True, dtype=np.uint8),
]
d1 = tiledb.Dim(name="dt", domain=(np.datetime64('1990-01-01'), np.datetime64('2100-01-01')), tile=250, dtype="datetime64[s]")
dom1 = tiledb.Domain(d1)
schema = tiledb.ArraySchema(domain=dom1, sparse=False, attrs=attrs, cell_order='col-major', tile_order='col-major')
schema.check()
tiledb.Array.create(array_market_stats, schema, ctx=ctx)
schema


Domain
"NameDomainTileData TypeIs Var-lengthFiltersdt(numpy.datetime64('1990-01-01T00:00:00'), numpy.datetime64('2100-01-01T00:00:00'))250 secondsdatetime64[s]False-"
Attributes
NameData TypeIs Var-LenIs NullableFilterscount_of_uplimituint16FalseFalse-count_of_downlimituint16FalseFalse-count_of_yiziupuint16FalseFalse-count_of_yizidownuint16FalseFalse-ratio_of_uplimitfloat32FalseFalse-ratio_of_downlimitfloat32FalseFalse-ratio_of_yiziupfloat32FalseFalse-ratio_of_yizidownfloat32FalseFalse-lbuint8TrueFalse-
Cell Order
col-major
Tile Order
col-major
Sparse
False

Name,Domain,Tile,Data Type,Is Var-length,Filters
dt,"(numpy.datetime64('1990-01-01T00:00:00'), numpy.datetime64('2100-01-01T00:00:00'))",250 seconds,datetime64[s],False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
count_of_uplimit,uint16,False,False,-
count_of_downlimit,uint16,False,False,-
count_of_yiziup,uint16,False,False,-
count_of_yizidown,uint16,False,False,-
ratio_of_uplimit,float32,False,False,-
ratio_of_downlimit,float32,False,False,-
ratio_of_yiziup,float32,False,False,-
ratio_of_yizidown,float32,False,False,-
lb,uint8,True,False,-


# 中国期货市场

In [43]:
cn_future = 'cn_future'
cn_future_group_name = f"{tiledb_bucket}/{cn_future}"

tiledb.group_create(cn_future_group_name, ctx=ctx)