### 所有股票信息
* 因子是可以进行全行业对比的
* 价格是没法对比的。所以我让所有的价格都除以第一天的价格，得到与第一天上升或下降的比例，然后减去1.
* 先合并，去除nan，在分开，计算完在合并并且要添加到最后。

In [None]:
# 0. 基础参数配置
class conf:
    # 设置用于训练和回测的开始/结束日期
    start_date = '2012-01-01'
    split_date = '2018-01-01'
    end_date = '2018-07-20'
    fields = ['close', 'open', 'high', 'low', 'amount', 'volume']
    features=['pe_ttm_0']
    # 每个input的长度，使用过去30天的数据
    feature_back_days = 30
    # 指定进行梯度下降时每个batch包含的样本数,训练时一个batch的样本会被计算一次梯度下降，使目标函数优化一步
    batch_size = 100
    #预期五日收益
    return_days=5
    
instrument = M.instruments.v2(
    start_date=conf.start_date,
    end_date=conf.end_date,
    market='CN_STOCK_A',
    instrument_list=['000005.SZA','000006.SZA'],
    max_count=0
)

## 加载数据

In [None]:
instruments=instrument.data.read()['instruments']
start_date=conf.start_date
end_date=conf.end_date
fields=conf.fields
features=conf.features

def load_data(instruments, start_date, end_date, fields,features):
    '''
    获取数据
    '''
    his=D.history_data(instruments, start_date, end_date,fields)
    fea=D.features(instruments, start_date, end_date,features)
    df=pd.merge(his,fea,on=['date','instrument'],how='inner')
    df=df.set_index('date',drop=True)
    df.dropna(inplace=True)
    return df

data=load_data(instruments, start_date, end_date, fields,features)

In [None]:
data.shape

### 单只股票

In [None]:
def construct_data(conf,data):
    '''
    构造数据，用来lstm
    '''
    result=[]    
    for i,df in  enumerate(data.groupby(by='instrument')):
        '''
        按照股票分组将数据整理成时间序列
        '''    
        ins,df=df
        if i>5: break
        
        return_days=100*(df['close'].shift(-5) / df['open'].shift(-1)-1)
        
        for index in range(df.shape[0]-conf.feature_back_days+1):
            '''
            对每只股票进行构造。
            '''   
            #处理fields,主要是计算相对第一天的涨跌
            fie_d=df[conf.fields][index:index+conf.feature_back_days]
            fie_d=fie_d.apply(lambda x:x/x[0]-1)
            #处理feature，没有任何处理
            fea_d=df[conf.features][index:index+conf.feature_back_days]    
            #将价格和特征合并
            X=pd.concat([fie_d,fea_d],axis=1)
            #当前日期
            current_date=X.index.values[-1]
            #当期收益
            y=return_days[current_date]
        
            result.append([X.values,y,ins,current_date])
            
    result=pd.DataFrame(result,columns=['X','y','instrument','date'])   
    result.dropna(inplace=True)
    return result
construt=construct_data(conf,data)

### 拆分数据集

In [None]:
train_data=construt.query('date <= "%s"' % conf.split_date)
test_data=construt.query('date >= "%s"' % conf.split_date)

In [None]:
# 4. LSTM模型训练
from keras.layers import Input, Dense, LSTM, concatenate
from keras.models import Model
from keras import optimizers

input_dim=7

def activation_atan(x):
    import tensorflow as tf
    return tf.atan(x)


def lstm_train(df, batch_size, activation):
    
    # 构建神经网络层 1层LSTM层+3层Dense层
    lstm_input = Input(shape=(conf.feature_back_days, input_dim), name='lstm_input')
    lstm_output = LSTM(128, activation=activation, dropout_W=0.2, dropout_U=0.1)(lstm_input)
    dense_output_1 = Dense(64, activation='relu')(lstm_output)
    dense_output_2 = Dense(16, activation='relu')(dense_output_1)
    predictions = Dense(1, activation=activation)(dense_output_2)

    model = Model(input=lstm_input, output=predictions)
    rms=optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-06)
    model.compile(optimizer=rms, loss='mse', metrics=['mse'])

    #df = input_ds.read_df()
    model.fit(
        np.array(df['X'].values.tolist()),np.array(df['y'].values.tolist()),
        batch_size=batch_size, nb_epoch=10,  verbose=2
    )

    # 保存模型
    model_ds = DataSource()
    model.save(model_ds.open_temp_path())
    model_ds.close_temp_path()
    
    return Outputs(data=model_ds)

lstm=lstm_train(train_data,conf.batch_size,activation_atan)


In [None]:
lstm.data

In [None]:
# 5. LSTM 预测
dd = [None, None]
def lstm_predict(model_ds, df, activation):
    import keras
    from keras.models import load_model

    keras.activations.activation_atan = activation
    try:
        model = load_model(model_ds.open_temp_path())
    except:
        model_ds.close_temp_path()
        raise

    
    predictions = model.predict(
        np.array(df['X'].values.tolist()))
    df['score'] = predictions.flatten()

    # 预测值和真实值的分布
    T.plot(
        df,
        x='y', y=['score'], chart_type='scatter',
        title='LSTM预测结果：实际值 vs. 预测值'
    )

    return Outputs(data=DataSource.write_df(df[['date', 'score']]))

lstm_m5 = M.cached.v2(run=lstm_predict, kwargs=dict(
    model_ds=lstm.data,
    df=test_data,
    activation=activation_atan
))

In [None]:
instrument.data.read()['instruments']

In [263]:
# 6. 回测：在沪深300上回测
def initialize(context):
    # 系统已经设置了默认的交易手续费和滑点，要修改手续费可使用如下函数
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    # 传入预测数据和真实数据
    context.predictions = context.options['prediction_ds'].read_df()

# 回测引擎：每日数据处理函数，每天执行一次
def handle_data(context, data):
    current_dt = data.current_dt.strftime('%Y-%m-%d')
    today_df = context.predictions[context.predictions.date == current_dt]
    if len(today_df) <= 0:
        return

    score = today_df.score.iloc[0]

    sid = context.symbol(context.options['instrument'])
    # 当前持仓
    cur_position = context.portfolio.positions[sid].amount
    if cur_position == 0:
        # 如果当前没有仓位
        if score > 0:
            # 如果预测要上涨
            context.order_target_percent(sid, 0.9)
            context.extension['last_buy_date'] = current_dt
    else:
        # 如果预测要下跌，并且持有超过了五天
        if score < 0:
            hold_days = context.trading_calendar.session_distance(
                pd.Timestamp(context.extension['last_buy_date']),
                pd.Timestamp(current_dt)
            )
            if hold_days >= 5:
                context.order_target(sid, 0)

# 调用回测引擎
lstm_m6 = M.trade.v2(
    instruments=instrument.data.read()['instruments'],
    start_date=conf.split_date,
    end_date=conf.end_date,
    initialize=initialize,
    handle_data=handle_data,
    order_price_field_buy='open',       # 表示 开盘 时买入
    order_price_field_sell='close',     # 表示 收盘 前卖出
    capital_base=10000,
    benchmark='000300.SHA',
    options={'instrument': instrument.data.read()['instruments'], 'prediction_ds': lstm_m5.data}
)

[2018-07-25 20:05:40.816364] INFO: bigquant: backtest.v7 开始运行..
[2018-07-25 20:05:40.819331] INFO: bigquant: biglearning backtest:V7.1.2


ValueError: Can't convert non-uniquely indexed DataFrame to Panel