In [1]:
from zipline.pipeline.loaders.blaze import from_blaze, BlazeLoader
from zipline.pipeline.fundamentals.reader import _gen_expr, fillvalue_for_expr

In [2]:
import pandas as pd
from zipline.research import create_domain
from zipline.pipeline import Pipeline
from zipline.pipeline.loaders.blaze import (
    from_blaze,
    BlazeLoader,
    NoMetaDataWarning,
)
from zipline.data import bundles
from zipline.pipeline.engine import SimplePipelineEngine

# 检查数据

In [3]:
from zipline.pipeline.fundamentals.preprocess import get_static_info_table

In [4]:
df, maps = get_static_info_table()

In [5]:
df['A254'].dtype

dtype('bool')

In [6]:
df['申万一级行业'].dtype

dtype('O')

# 表达式

In [7]:
table_name = 'infoes'

In [8]:
expr = _gen_expr(table_name)

# loader

In [9]:
loader = BlazeLoader()

# finder

In [10]:
bundle = 'cndaily'
bundle_data = bundles.load(bundle)
finder = bundle_data.asset_finder

# domain

In [11]:
from zipline.research import create_domain

In [12]:
dates = pd.date_range('2020-06-01','2020-07-05')

In [13]:
domain = create_domain(dates)

# dataset

In [14]:
ds = from_blaze(
    expr,
    loader=loader,
    no_deltas_rule='ignore',
    no_checkpoints_rule='ignore',
    missing_values=fillvalue_for_expr(expr),
    domain=domain
)

In [15]:
p = Pipeline()
for a in ['申万一级行业', '申万一级行业编码', '注册资本', 'A100']:
    p.add(getattr(ds, a).latest, a)

In [16]:
result = SimplePipelineEngine(
            loader, finder,
        ).run_pipeline(p, dates[0], dates[-1])

In [17]:
result.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,申万一级行业,申万一级行业编码,注册资本,A100
datetime,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-04 00:00:00+00:00,吉贝尔(688566),医药生物,11,18694.16,False
2020-07-04 00:00:00+00:00,凌志软件(688588),计算机,25,40001.0003,False
2020-07-04 00:00:00+00:00,金博股份(688598),有色金属,4,8000.0,False
2020-07-04 00:00:00+00:00,天合光能(688599),电气设备,22,206802.6375,False
2020-07-04 00:00:00+00:00,N皖仪(688600),机械设备,23,13334.0,False


In [18]:
result.index.levels[0]

DatetimeIndex(['2020-06-01 00:00:00+00:00', '2020-06-02 00:00:00+00:00',
               '2020-06-03 00:00:00+00:00', '2020-06-04 00:00:00+00:00',
               '2020-06-05 00:00:00+00:00', '2020-06-06 00:00:00+00:00',
               '2020-06-07 00:00:00+00:00', '2020-06-08 00:00:00+00:00',
               '2020-06-09 00:00:00+00:00', '2020-06-10 00:00:00+00:00',
               '2020-06-11 00:00:00+00:00', '2020-06-12 00:00:00+00:00',
               '2020-06-13 00:00:00+00:00', '2020-06-14 00:00:00+00:00',
               '2020-06-15 00:00:00+00:00', '2020-06-16 00:00:00+00:00',
               '2020-06-17 00:00:00+00:00', '2020-06-18 00:00:00+00:00',
               '2020-06-19 00:00:00+00:00', '2020-06-20 00:00:00+00:00',
               '2020-06-21 00:00:00+00:00', '2020-06-22 00:00:00+00:00',
               '2020-06-23 00:00:00+00:00', '2020-06-24 00:00:00+00:00',
               '2020-06-25 00:00:00+00:00', '2020-06-26 00:00:00+00:00',
               '2020-06-27 00:00:00+00:00', '2020-0

In [19]:
p.columns

{'申万一级行业': Latest([BlazeDataSet_0<CN>.申万一级行业], 1),
 '申万一级行业编码': Latest([BlazeDataSet_0<CN>.申万一级行业编码], 1),
 '注册资本': Latest([BlazeDataSet_0<CN>.注册资本], 1),
 'A100': Latest([BlazeDataSet_0<CN>.A100], 1)}

In [20]:
p = Pipeline()

In [21]:
p._domain

GENERIC

In [22]:
ds2 = from_blaze(
    expr,
    loader=loader,
    no_deltas_rule='ignore',
    no_checkpoints_rule='ignore',
    missing_values=fillvalue_for_expr(expr),
    domain=domain
)

In [23]:
p = Pipeline(domain=domain)
for a in ['申万一级行业', '申万一级行业编码', '注册资本', 'A100']:
    p.add(getattr(ds2, a).latest, a)

In [24]:
result_2 = SimplePipelineEngine(
            loader, finder,
        ).run_pipeline(p, dates[0], dates[-1])

In [25]:
result_2

Unnamed: 0_level_0,Unnamed: 1_level_0,申万一级行业,申万一级行业编码,注册资本,A100
datetime,asset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-06-01 00:00:00+00:00,平安银行(000001),银行,17,1.940592e+06,False
2020-06-01 00:00:00+00:00,万 科Ａ(000002),房地产,14,1.161773e+06,False
2020-06-01 00:00:00+00:00,国农科技(000004),医药生物,11,1.650526e+04,False
2020-06-01 00:00:00+00:00,世纪星源(000005),公用事业,12,1.058537e+05,False
2020-06-01 00:00:00+00:00,深振业Ａ(000006),房地产,14,1.349995e+05,False
...,...,...,...,...,...
2020-07-04 00:00:00+00:00,吉贝尔(688566),医药生物,11,1.869416e+04,False
2020-07-04 00:00:00+00:00,凌志软件(688588),计算机,25,4.000100e+04,False
2020-07-04 00:00:00+00:00,金博股份(688598),有色金属,4,8.000000e+03,False
2020-07-04 00:00:00+00:00,天合光能(688599),电气设备,22,2.068026e+05,False
