In [3]:
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error

num_of_input_nodes = 4
num_of_hidden_nodes = 256
num_of_output_nodes = 3
length_of_sequences = 24
num_of_training_epochs = 1000
size_of_mini_batch = 20
num_of_prediction_epochs = 20
learning_rate = 0.01
forget_bias = 0.5
num_of_sample = 1000

In [4]:
def get_batch(batch_size, X, t, epoch):

    if (batch_size + length_of_sequences + epoch) > 6164:
        epoch = epoch - 6164 - batch_size - length_of_sequences
    xs = np.array([[X[ i + j + epoch] for i in range(length_of_sequences)] for j in range(batch_size)])
    ts = np.array([t[i + length_of_sequences + epoch] for i in range(batch_size)])

    return xs, ts

In [5]:
def create_data(nb_of_samples, sequence_len):
    shift_num = -24
    # CSVファイルから過去レートを読み込む
    df = pd.read_csv("USDJPY_hourly2010_train.txt")
    # 最後の列にHIGHを追加
    df_shift = df.copy()
    df_shift['ANS'] = df['HIGH']
    df_shift.ANS = df_shift.ANS.shift(shift_num)

    # 最後の行を除外
    df_shift = df_shift[:shift_num]

    df_shift.loc[df_shift['HIGH'] == df_shift['ANS'], 'EQ'] = 1.
    df_shift.loc[df_shift['HIGH'] != df_shift['ANS'], 'EQ'] = 0.
    df_shift.loc[df_shift['HIGH'] < df_shift['ANS'], 'UP'] = 1.
    df_shift.loc[df_shift['HIGH'] >= df_shift['ANS'], 'UP'] = 0.
    df_shift.loc[df_shift['HIGH'] > df_shift['ANS'], 'DN'] = 1.
    df_shift.loc[df_shift['HIGH'] <= df_shift['ANS'], 'DN'] = 0.
    
    # 念のためデータをdf_2として新しいデータフレームへコピ−
    df_2 = df_shift.copy()
    # time（時間）を削除
    del df_2['DTYYYYMMDD']
    del df_2['TIME']
    del df_2['TICKER']
    del df_2['ANS']
    
    df_3 = df_2.copy()
    del df_3['OPEN']
    del df_3['HIGH']
    del df_3['LOW']
    del df_3['CLOSE']

    # データセットの行数と列数を格納
    n = df_2.shape[0]
    p = df_2.shape[1]
    # 訓練データとテストデータへ切り分け
    #train_start = 0
    #train_end = int(np.floor(0.8*n))
    #test_start = train_end + 1
    #test_end = n
    #data_train = df_2.loc[np.arange(train_start, train_end), :]
    #data_test = df_2.loc[np.arange(test_start, test_end), :]

    data_train = df_2
    data_test = df_3
    
    # データの正規化
    scaler1 = MinMaxScaler(feature_range=(-1, 1))
    scaler1.fit(data_train)
    data_train_norm = scaler1.transform(data_train)

    scaler2 = MinMaxScaler(feature_range=(0, 1))
    scaler2.fit(data_test)
    data_test_norm = scaler2.transform(data_test)
    
    # 特徴量とターゲットへ切り分け
    X_train = data_train_norm[:, 0:4]
    y_train = data_test_norm[:,]

    return X_train, y_train


In [6]:
def make_prediction(nb_of_samples,epoch):
    sequence_len = 24
    txs, tts = create_data(nb_of_samples, sequence_len)
    if (size_of_mini_batch + length_of_sequences + epoch) > 6164:
        epoch = epoch - 6164 - size_of_mini_batch - length_of_sequences
    rxs = np.array([[txs[ i + j + epoch] for i in range(length_of_sequences)] for j in range(size_of_mini_batch)])
    rts = np.array([tts[i + length_of_sequences + epoch] for i in range(size_of_mini_batch)])
    return rxs, rts

In [7]:
def inference(input_ph, istate_ph):
    with tf.name_scope("inference") as scope:
        weight1_var = tf.Variable(tf.truncated_normal(
            [num_of_input_nodes, num_of_hidden_nodes], stddev=0.1), name="weight1")
        weight2_var = tf.Variable(tf.truncated_normal(
            [num_of_hidden_nodes, num_of_output_nodes], stddev=0.1), name="weight2")
        bias1_var = tf.Variable(tf.truncated_normal([num_of_hidden_nodes], stddev=0.1), name="bias1")
        bias2_var = tf.Variable(tf.truncated_normal([num_of_output_nodes], stddev=0.1), name="bias2")

        in1 = tf.transpose(input_ph, [1, 0, 2])
        in2 = tf.reshape(in1, [-1, num_of_input_nodes])
        in3 = tf.matmul(in2, weight1_var) + bias1_var
        in4 = tf.split(in3, length_of_sequences, 0)

        cell = tf.nn.rnn_cell.BasicLSTMCell(num_of_hidden_nodes, forget_bias=forget_bias, state_is_tuple=False)
        rnn_output, states_op = tf.contrib.rnn.static_rnn(cell, in4, initial_state=istate_ph)
        #output_op = tf.matmul(rnn_output[-1], weight2_var) + bias2_var
        output_op = tf.nn.softmax(tf.matmul(rnn_output[-1], weight2_var) + bias2_var)

        # Add summary ops to collect data
        w1_hist = tf.summary.histogram("weights1", weight1_var)
        w2_hist = tf.summary.histogram("weights2", weight2_var)
        b1_hist = tf.summary.histogram("biases1", bias1_var)
        b2_hist = tf.summary.histogram("biases2", bias2_var)
        output_hist = tf.summary.histogram("output",  output_op)
        results = [weight1_var, weight2_var, bias1_var,  bias2_var]
        return output_op, states_op, results

In [8]:
def loss(output_op, supervisor_ph):
    with tf.name_scope("loss") as scope:
        square_error = tf.reduce_mean(tf.square(output_op - supervisor_ph))
        loss_op = square_error
        tf.summary.scalar("loss", loss_op)
        return loss_op

In [9]:
def training(loss_op):
    with tf.name_scope("training") as scope:
        training_op = optimizer.minimize(loss_op)
        return training_op

In [10]:
def calc_accuracy(output_op, prints=False, epoch=0):
    inputs, ts = make_prediction(num_of_prediction_epochs,epoch)
    pred_dict = {
        input_ph:  inputs,
        supervisor_ph: ts,
        istate_ph:    np.zeros((num_of_prediction_epochs, num_of_hidden_nodes * 2)),
    }
    output = sess.run([output_op], feed_dict=pred_dict)
    good = 0
    for i in range(0, len(ts)):
        #print("=====[Output]=====[",i,"]")
        #print(output[0][i][0])
        #print("=====[Output]=====[",i,"]")
        #print("=====[ts]=====[",i,"]")
        #print(ts[i][0])
        #print("=====[ts]=====[",i,"]")
        #print("=====[output - ts]=====[",i,"]")
        #print(abs(output - ts)[0][i])
        #print("=====[output - ts]=====[",i,"]")
        decisionOutput = 0
        for j in range(0,3):
            if output[0][i][decisionOutput] < output[0][i][j]:
                decisionOutput = j
        decisionTs = 0
        for j in range(0,3):
            if ts[i][decisionTs] < ts[i][j]:
                decisionTs = j
        if decisionOutput == decisionTs:
            print("Correct: ",decisionTs," Output: ",decisionOutput)
            good = good + 1
        else :
            print("Correct: ",decisionTs," Output: ",decisionOutput)
    print("Total: ",len(ts)," Correct: ",good)

    def print_result(i, p, q):
        [print(list(x)) for x in i]
        print("output: %f, correct: %f" % (p, q))
    if prints:
        [print_result(i, p, q) for i, p, q in zip(inputs, output[0], ts)]

    opt = abs(output - ts)[0]

    total = sum([1 if x[0] < 0.05 else 0 for x in opt])
    #print("accuracy %f" % (total / float(len(ts))))
    return output

In [11]:
random.seed(0)
np.random.seed(0)
tf.set_random_seed(0)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

X, t = create_data(num_of_sample, length_of_sequences)

with tf.Graph().as_default():
    input_ph = tf.placeholder(tf.float32, [None, length_of_sequences, num_of_input_nodes], name="input")
    supervisor_ph = tf.placeholder(tf.float32, [None, num_of_output_nodes], name="supervisor")
    istate_ph = tf.placeholder(tf.float32, [None, num_of_hidden_nodes * 2], name="istate")

    output_op, states_op, datas_op = inference(input_ph, istate_ph)
    loss_op = loss(output_op, supervisor_ph)
    training_op = training(loss_op)

    summary_op = tf.summary.merge_all()
    init = tf.initialize_all_variables()

    with tf.Session() as sess:
        saver = tf.train.Saver()
        summary_writer = tf.summary.FileWriter("/tmp/tensorflow_log", graph=sess.graph)
        sess.run(init)

        for epoch in range(num_of_training_epochs):
            inputs, supervisors = get_batch(size_of_mini_batch, X, t, epoch)
            train_dict = {
                input_ph:      inputs,
                supervisor_ph: supervisors,
                istate_ph:     np.zeros((size_of_mini_batch, num_of_hidden_nodes * 2)),
            }
            sess.run(training_op, feed_dict=train_dict)

            if (epoch) % 100 == 0:
                summary_str, train_loss = sess.run([summary_op, loss_op], feed_dict=train_dict)
                print("train#%d, train loss: %e" % (epoch, train_loss))
                summary_writer.add_summary(summary_str, epoch)
                if (epoch) % 500 == 0:
                    calc_accuracy(output_op, epoch=epoch)
        print(output_op)
        calc_accuracy(output_op, prints=False)
        datas = sess.run(datas_op)
        #saver.save(sess, "model.ckpt")

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `keras.layers.RNN(cell, unroll=True)`, which is equivalent to this API
Instructions for updating:
Use `tf.global_variables_initializer` instead.
train#0, train loss: 2.170877e-01
Correct:  2  Output:  1
Correct:  2  Output:  1
Correct:  2  Output:  1
Correct:  2  Output:  1
Correct:  2  Output:  1
Correct:  2  Output:  1
Correct:  1  Output:  1
Correct:  1  Output:  1
Correct:  1  Output:  1
Correct:  1  Output:  1
Correct:  1  Output:  1
Correct:  1  Output:  1
Correct:  1  Output:  1
Correct:  1  Output:  1
Correct:  