In [1]:
import pandas as pd
import numpy as np

In [2]:
from pprint import pprint

In [3]:
fname = 'wy_cjmx.h5'
store = pd.HDFStore(fname, mode='r')

In [4]:
pprint(store.info())

("<class 'pandas.io.pytables.HDFStore'>\n"
 'File path: wy_cjmx.h5\n'
 '/df            frame_table  '
 '(typ->appendable_multi,nrows->421395453,ncols->7,indexers->[index],dc->[股票代码,成交时间,成交价,价格变动,成交量,成交额,性质])')


## 索引

In [6]:
i = store.root.df.table.cols.index.index

In [7]:
i.optlevel, i.kind

(9, 'full')

In [8]:
store.get_storer('df').table

/df/table (Table(421395453,), shuffle, blosc:blosclz(9)) ''
  description := {
  "index": Int64Col(shape=(), dflt=0, pos=0),
  "股票代码": StringCol(itemsize=6, shape=(), dflt=b'', pos=1),
  "成交时间": Int64Col(shape=(), dflt=0, pos=2),
  "成交价": Float64Col(shape=(), dflt=0.0, pos=3),
  "价格变动": Float64Col(shape=(), dflt=0.0, pos=4),
  "成交量": Int64Col(shape=(), dflt=0, pos=5),
  "成交额": Float64Col(shape=(), dflt=0.0, pos=6),
  "性质": StringCol(itemsize=9, shape=(), dflt=b'', pos=7)}
  byteorder := 'little'
  chunkshape := (2080,)
  autoindex := True
  colindexes := {
    "index": Index(9, full, shuffle, zlib(1)).is_csi=True,
    "股票代码": Index(9, full, shuffle, zlib(1)).is_csi=True,
    "成交时间": Index(9, full, shuffle, zlib(1)).is_csi=True,
    "成交价": Index(9, full, shuffle, zlib(1)).is_csi=True,
    "价格变动": Index(9, full, shuffle, zlib(1)).is_csi=True,
    "成交量": Index(9, full, shuffle, zlib(1)).is_csi=True,
    "成交额": Index(9, full, shuffle, zlib(1)).is_csi=True,
    "性质": Index(9, full, shuffle,

## 查询

### 主索引`dt`

In [9]:
where = ["成交时间 >= pd.Timestamp('2020-03-24 09:25') & 成交时间 <= pd.Timestamp('2020-03-26 15:01')"]

In [10]:
# 1m22s
df = store.select('df', where=where, iterator=False)

In [11]:
df.shape

(20464301, 5)

### 次索引查询

In [12]:
# 4s
where = [
    "股票代码 in ['000001','000333','300001']"
]
df = store.select('df', where=where, iterator=False)

In [13]:
df.shape

(701504, 5)

### 索引组合

In [14]:
# 1.4s
where = [
    "股票代码 in ['000001','000333','300001']",
    "成交时间 >= pd.Timestamp('2020-03-24 09:25') & 成交时间 <= pd.Timestamp('2020-03-26 15:01')"
]
df = store.select('df', where=where, iterator=False)

In [15]:
df.shape

(36126, 5)

### 列查询

In [16]:
# 0.79s
where = [
    "成交价 > 1000"
]
df = store.select('df', where=where, iterator=False)

In [17]:
df.shape

(155209, 5)

### 组合

In [18]:
# 4.2s
where = [
    "股票代码 in ['000001','000333','300001']",
    "成交时间 >= pd.Timestamp('2020-03-24 09:25') & 成交时间 <= pd.Timestamp('2020-03-26 15:01')",
    "成交价 > 30"
]
df = store.select('df', where=where, iterator=False)

In [19]:
df.shape

(12639, 5)

### `或`与`非`表达式

In [20]:
# 2.5s
where = [
    "(股票代码 = '000001') | (股票代码 = ['600000','600645'])",
]
df = store.select('df', where=where, iterator=False)
df.reset_index(inplace=True)
df['股票代码'].unique()

array(['000001', '600000', '600645'], dtype=object)

In [21]:
# 0.7s
where = [
    "性质 != '买盘' and (股票代码 = ['600000','600645'])",
]
df = store.select('df', where=where, iterator=False)
df['性质'].unique()

array(['卖盘', '中性盘'], dtype=object)

**注**：`~(expr)`用法受限

## 属性

In [29]:
last_date = pd.Timestamp('1990')
for df in store.select('df', iterator=True):
    last_date = max(last_date, df.index.get_level_values(0).max())

In [31]:
last_date

Timestamp('2020-03-26 15:00:27')

In [32]:
store.get_storer('df').attrs

/df._v_attrs (AttributeSet), 15 attributes:
   [CLASS := 'GROUP',
    TITLE := '',
    VERSION := '1.0',
    data_columns := ['股票代码', '成交时间', '成交价', '价格变动', '成交量', '成交额', '性质'],
    encoding := 'UTF-8',
    errors := 'strict',
    index_cols := [(0, 'index')],
    info := {1: {'names': [None], 'type': 'Index'}, 'index': {}, '股票代码': {}, '成交时间': {}, '成交价': {}, '价格变动': {}, '成交量': {}, '成交额': {}, '性质': {}},
    levels := ['成交时间', '股票代码'],
    nan_rep := 'nan',
    non_index_axes := [(1, ['成交时间', '股票代码', '成交价', '价格变动', '成交量', '成交额', '性质'])],
    pandas_type := 'frame_table',
    pandas_version := '0.15.2',
    table_type := 'appendable_multiframe',
    values_cols := ['股票代码', '成交时间', '成交价', '价格变动', '成交量', '成交额', '性质']]

In [30]:
# 设置属性
store.get_storer('df').attrs.last_date = last_date

FileModeError: the file is not writable

In [None]:
store.close()

In [None]:
store.open()
store.get_storer('df').attrs.last_date

In [33]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,成交价,价格变动,成交量,成交额,性质
成交时间,股票代码,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-20 10:36:19,688396,41.13,0.00,53,219303.0,卖盘
2020-03-20 10:36:25,688396,41.20,0.07,22,92639.0,买盘
2020-03-20 10:36:28,688396,41.14,-0.06,11,47967.0,中性盘
2020-03-20 10:36:34,688396,41.20,0.00,8,35351.0,卖盘
2020-03-20 10:36:41,688396,41.17,-0.03,4,16468.0,卖盘
...,...,...,...,...,...,...
2020-03-26 14:56:46,688399,83.09,0.00,12,100206.0,买盘
2020-03-26 14:56:51,688399,83.09,0.00,23,196093.0,卖盘
2020-03-26 14:56:53,688399,83.00,-0.09,64,539347.0,卖盘
2020-03-26 14:56:57,688399,83.09,0.09,4,39917.0,中性盘


In [34]:
store.get_storer('df').nrows

421395453