In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import data_handle
import tensorflow as tf

# 载入并处理数据
1.读取数据

In [2]:
X, dummies, weight, label = data_handle.read_data('data/stock_train_data_20170910.csv')

2.将数据平均化，并去除极端值

In [3]:
X, scaled_features = data_handle.scale_feature(X,dummies,quantile_percent=0.995)

3.将数据进行随机分组，分成测试集与训练集

In [4]:
X_train, Y_train, X_test, Y_test = data_handle.data_split(X, label, test_size=0.1)
print('X_train shape:',X_train.shape,'\n',
     'Y_train shape:', Y_train.shape,'\n',
     'X_test shape:', X_test.shape,'\n',
     'Y_test shape:', Y_test.shape)

X_train shape: (289506, 116) 
 Y_train shape: (289506,) 
 X_test shape: (32168, 116) 
 Y_test shape: (32168,)


4.定义分批获取数据函数

In [5]:
def get_batches(X, Y, batch_size):
    data_len = len(X)
    for i in range(0, data_len, batch_size):
        end = i + batch_size
        if end > data_len:
            end = -1
        x = X[i: end].reshape(-1,X.shape[1])
        #print(x.shape)
        y = Y[i : end].reshape(-1,1)
        yield x, y

# 模型构建

In [6]:
def build_inputs(num_features):
    '''
    构建输入
    '''
    inputs = tf.placeholder(tf.float32, [None, num_features], name='inputs')
    targets = tf.placeholder(tf.float32, [None, 1], name='targets')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    return inputs, targets, keep_prob

In [7]:
def fc_model(inputs,keep_prob):
    layer1 = tf.layers.dense(inputs,58,activation=tf.nn.relu,kernel_initializer=tf.truncated_normal_initializer())
    dropout = tf.nn.dropout(layer1,keep_prob)
    layer2 = tf.layers.dense(dropout,29,activation=tf.nn.relu,kernel_initializer=tf.truncated_normal_initializer())
    dropout = tf.nn.dropout(layer2,keep_prob)
    layer3 = tf.layers.dense(dropout,14,activation=tf.nn.relu,kernel_initializer=tf.truncated_normal_initializer())
    dropout = tf.nn.dropout(layer3,keep_prob)
    logits = tf.layers.dense(dropout,1,activation=None,kernel_initializer=tf.truncated_normal_initializer())
    return logits

# 训练模型

In [8]:
def train(X_train,Y_train,X_test,Y_test,keep_prob,epoch_count, batch_size, learning_rate=0.001, num_features=116):
    inputs, targets, k_p = build_inputs(num_features)
    logits = fc_model(inputs,keep_prob)
    out = tf.sigmoid(logits)
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,labels=targets))
    train_opt = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
    correct_pred = tf.equal(tf.cast(tf.round(out), tf.int32), tf.cast(targets, tf.int32))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    steps = 0
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch_i in range(epoch_count):
            for x,y in get_batches(X_train,Y_train,batch_size):
                steps += 1
                _, train_loss, train_accuracy = sess.run([train_opt, loss, accuracy], feed_dict={inputs:x, targets:y, k_p:keep_prob})
                
                if steps % 1000 == 0:
                    test_loss, test_accuracy = sess.run([loss, accuracy], feed_dict={inputs:X_test.reshape(-1,num_features),
                                                                                     targets:Y_test.reshape(-1,1), k_p:1.0})
                    print("Epoch {}/{}.".format(epoch_i+1, epoch_count),
                          "train_loss: {:.4f}..".format(train_loss),
                          "train_acc: {:.4f}..".format(train_accuracy),
                          "test_loss:{:.4f}..".format(test_loss),
                          "test_acc:{:.4f}..".format(test_accuracy))

In [None]:
batch_size = 100
learning_rate = 0.0001
keep_prob = 0.9
epochs = 200

with tf.Graph().as_default():
    train(X_train,Y_train,X_test,Y_test,keep_prob,epochs,batch_size,learning_rate)

Epoch 1/200. train_loss: 14.5359.. train_acc: 0.5900.. test_loss:21.1278.. test_acc:0.5102..
Epoch 1/200. train_loss: 8.4047.. train_acc: 0.5000.. test_loss:11.3755.. test_acc:0.5023..
Epoch 2/200. train_loss: 6.0696.. train_acc: 0.4300.. test_loss:6.4924.. test_acc:0.4978..
Epoch 2/200. train_loss: 2.6469.. train_acc: 0.5200.. test_loss:3.2698.. test_acc:0.4995..
Epoch 2/200. train_loss: 1.5534.. train_acc: 0.5300.. test_loss:1.9545.. test_acc:0.4922..
Epoch 3/200. train_loss: 2.0327.. train_acc: 0.5100.. test_loss:1.3635.. test_acc:0.5192..
Epoch 3/200. train_loss: 0.7803.. train_acc: 0.5200.. test_loss:1.1073.. test_acc:0.5228..
Epoch 3/200. train_loss: 1.1405.. train_acc: 0.5300.. test_loss:0.9394.. test_acc:0.5239..
Epoch 4/200. train_loss: 0.7434.. train_acc: 0.5400.. test_loss:0.8421.. test_acc:0.5266..
Epoch 4/200. train_loss: 0.6799.. train_acc: 0.5600.. test_loss:0.7967.. test_acc:0.5265..
Epoch 4/200. train_loss: 0.6791.. train_acc: 0.5800.. test_loss:0.7405.. test_acc:0.526

Epoch 32/200. train_loss: 0.6543.. train_acc: 0.6100.. test_loss:0.6829.. test_acc:0.5565..
Epoch 32/200. train_loss: 0.6641.. train_acc: 0.6600.. test_loss:0.6829.. test_acc:0.5549..
Epoch 33/200. train_loss: 0.6947.. train_acc: 0.5000.. test_loss:0.6820.. test_acc:0.5566..
Epoch 33/200. train_loss: 0.6704.. train_acc: 0.5600.. test_loss:0.6827.. test_acc:0.5569..
Epoch 33/200. train_loss: 0.7174.. train_acc: 0.5300.. test_loss:0.6826.. test_acc:0.5568..
Epoch 34/200. train_loss: 0.6720.. train_acc: 0.5900.. test_loss:0.6822.. test_acc:0.5551..
Epoch 34/200. train_loss: 0.6658.. train_acc: 0.5800.. test_loss:0.6809.. test_acc:0.5573..
Epoch 34/200. train_loss: 0.6566.. train_acc: 0.6400.. test_loss:0.6815.. test_acc:0.5589..
Epoch 35/200. train_loss: 0.6680.. train_acc: 0.5500.. test_loss:0.6813.. test_acc:0.5566..
Epoch 35/200. train_loss: 0.6652.. train_acc: 0.6000.. test_loss:0.6813.. test_acc:0.5561..
Epoch 35/200. train_loss: 0.6892.. train_acc: 0.5000.. test_loss:0.6819.. test_a