# ライブラリをインポートする

In [1]:
from keras.models import Sequential
from keras.utils import np_utils, plot_model
from keras.layers import Dense, Activation, Dropout, LSTM
from keras.initializers import glorot_uniform, orthogonal, TruncatedNormal
from keras.callbacks import EarlyStopping
from keras.layers.recurrent import GRU, SimpleRNN

import pandas as pd #行列計算
import numpy as np #行列計算
import math #数値計算
import itertools #順列・組み合わせ
import time

import matplotlib.pyplot as plt #グラフ
import winsound # ビープ音

from df_method import rise_fall_rate, moving_average, GCDC, df_shift, add_data, RSI, Z_score_normalization, Min_Max_normalization

Using TensorFlow backend.


# ニューラルネットワーク構築

In [2]:
class Prediction :
    #初期化
    def __init__(self, maxlen, n_hidden, n_in, n_out, learning_model):
        self.maxlen = maxlen #入力系列数
        self.n_hidden = n_hidden #出力次元（隠れ層内のニューロン数）
        self.n_in = n_in #学習データの列数
        self.n_out = n_out #ラベルデータの列数
        
        self.learning_model = learning_model #●学習モデルの選択

    #モデルの生成
    def create_model(self):
        model = Sequential()
        if self.learning_model == 'RNN':
            #RNN層
            model.add(SimpleRNN(self.n_hidden,
                                batch_input_shape = (None, self.maxlen, self.n_in),
                                kernel_initializer = glorot_uniform(seed=20170719),
                                recurrent_initializer = orthogonal(gain=1.0, seed=20170719),
                                dropout = 0.5,
                                recurrent_dropout = 0.5))
        elif self.learning_model == 'LSTM':
            #LSTM層
            model.add(LSTM(self.n_hidden,
                           batch_input_shape = (None, self.maxlen, self.n_in),
                           kernel_initializer = glorot_uniform(seed=20170719), 
                           recurrent_initializer = orthogonal(gain=1.0, seed=20170719), 
                           dropout = 0.5, 
                           recurrent_dropout = 0.5))
        elif self.learning_model == 'GRU':
            #GRU層
            model.add(GRU(self.n_hidden,
                          batch_input_shape = (None, self.maxlen, self.n_in),
                          kernel_initializer = glorot_uniform(seed=20170719),
                          recurrent_initializer = orthogonal(gain=1.0, seed=20170719),
                          dropout = 0.5,
                          recurrent_dropout = 0.5))
        #ドロップアウト層
        model.add(Dropout(0.5))
        #結合層
        model.add(Dense(self.n_out, kernel_initializer = glorot_uniform(seed=20170719)))
        #活性化層
        model.add(Activation("softmax"))
        #コンパイル
        model.compile(loss="categorical_crossentropy", optimizer = "Adam", metrics = ['categorical_accuracy']) # "RMSprop"
        return model

    # 学習
    def train(self, x_train, t_train, batch_size, epochs) :
        early_stopping = EarlyStopping(patience=0, verbose=1)
        model = self.create_model()
        self.hist = model.fit(x_train, t_train, batch_size = batch_size, epochs = epochs, verbose = 1,
                              shuffle = True, callbacks = [early_stopping], validation_split = 0.1)
        return model

## 変数宣言

In [3]:
is_debug = True
csv_path = './csv_realtime/'
day_list = ['1'] # '1', '2', '7', '30', '365'
learning_model_list = ['LSTM']#['RNN', 'LSTM', 'GRU']
year_list = ['2013']#  '1960', '1970', '1980', '1990', '2000', '2010'
end_date = '2018-10-31'# 終点年月日

min_maxlen = 100
max_maxlen = 1000
min_n_hidden = 100
max_n_hidden = 500

target_name = 'USD_JPY_diff'# 'nikkei_Close', 'nikkei_diff', 'USD_JPY'
max_score = 0 # 最高正答率

# メイン処理

In [4]:
result_csv = pd.DataFrame(columns=['time', 'day', 'model', 'start date', 'end date', 'maxlen', 'n_hidden', 'correct', 'semi_correct'])

for learning_model in learning_model_list:
    for x_days_later in day_list:
        x_days_later = int(x_days_later)
    
        df_list = [] #データフレームのリスト
        
        if target_name == 'nikkei_Close':
            #日経
            df_list.append(add_data(csv_path+'nikkei.csv'))
            
        elif target_name == 'nikkei_diff':
            df_list.append(add_data(csv_path+'NASDAQ.csv'))
            df_list.append(add_data(csv_path+'USD_JPY.csv'))
            #df_list.append(add_data(csv_path+'EUR_JPY.csv'))
            #df_list.append(add_data(csv_path+'EUR_USD.csv'))
            df_list.append(add_data(csv_path+'nikkei.csv'))
            
            #日経平均の始値と終値の差
            df = pd.read_csv(csv_path+'nikkei.csv', index_col='Date', parse_dates=True)#読み込み
            df = df.apply(np.log)*100#正規化
            df = df['nikkei_Close'] - df['nikkei_Open']#終値と始値の差を求める
            df = df['1988-04-08':]#始値と終値と高値と安値が記録され始めた日からのみ抽出
            #df = df.diff(x_days_later)#特定の日数後の増減を求める
            #df = df.drop(df.index[0:x_days_later], axis=0)#特定の日数分の行を削除
            #df = df_shift(df, 1) 
            df = df.rename('nikkei_diff')#名前を付ける
            df_list.append(df)
            
            #取引量
            df = pd.read_csv(csv_path+'nikkei_volume.csv', index_col='Date', parse_dates=True)
            df = df[df.Volume != 0]
            df = rise_fall_rate(df, 1)
            #df = df.apply(np.log)*100
            #df = np.log(df)*100
            df_list.append(df)
            
            #米国債
            #df = pd.read_csv('./csv_realtime/treasury_10.csv', index_col='Date', parse_dates=True)
            #df = df.drop('Rate', axis=1)
            #df_list.append(rise_fall_rate(df, x_days_later))
            
        elif target_name == 'USD_JPY_diff':
            #df_list.append(add_data(csv_path+'NASDAQ.csv', x_days_later))
            df_list.append(add_data(csv_path+'USD_JPY.csv', x_days_later))
            df_list.append(add_data(csv_path+'EUR_JPY.csv', x_days_later))
            #df_list.append(add_data(csv_path+'EUR_USD.csv', x_days_later))
            df_list.append(add_data(csv_path+'nikkei.csv', x_days_later))
            #df_list.append(add_data(csv_path+'DOW30.csv', x_days_later))
            
            #f_list.append(add_data(csv_path+'TNX.csv'))#1米国債10年？
            #_list.append(add_data(csv_path+'GSPC.csv'))#S&P500
            #df_list.append(add_data(csv_path+'RUT.csv'))#Russell2000
            #df_list.append(add_data(csv_path+'TOPIX.csv'))
            #df_list.append(add_data(csv_path+'BTC_USD.csv'))
            
            #ドル円の始値と終値の差
            df = pd.read_csv(csv_path+'USD_JPY.csv', index_col='Date', parse_dates=True)#読み込み
            df = df.apply(np.log)*100#正規化
            df = df['USD_JPY_Close'] - df['USD_JPY_Open']#終値と始値の差を求める
            df = df['1989-10-16':]#始値と終値と高値と安値が記録され始めた日からのみ抽出
            #df = df.diff(x_days_later)#特定の日数後の増減を求める
            #df = df.drop(df.index[0:x_days_later], axis=0)#特定の日数分の行を削除
            #df = df_shift(df, 1)
            df = df.rename('USD_JPY_diff')#名前を付ける
            df_list.append(df)
            
            # 米国債
            df = pd.read_csv(csv_path+'treasury_10.csv', index_col='Date', parse_dates=True)
            df_list.append(rise_fall_rate(df, x_days_later))         

        #全データフレームを結合
        df_x = df_list[0]
        for i in range(len(df_list) - 1):   
            df = df_list[i + 1]
            df_x = df_x.join(df, how='inner', rsuffix='_' + str(i))
            
        print(df_x)

        
        #指定の期間を抽出してラベルデータを作成------------------------------------------ 
        for year in year_list:        
            #指定の期間を抽出
            if end_date == '':
                df_x = df_x[year:]
            else:
                df_x = df_x[year:end_date]
        
            if is_debug == True:
                print('df_x.shape=', df_x.shape)
                #print(df_x)
    
            #ラベルデータを作成する列
            target = df_x[target_name]
            
            #空のデータフレーム作成
            df_t = pd.DataFrame(index=df_x.index, columns=['jump', 'rise', 'fall', 'drop'])
            df_t = df_t.fillna(0) #０で埋める

            #条件にあった値を置換する
            df_t.loc[0.995033085 <= target, 'jump'] = 1
            df_t.loc[(0 <= target) & (target < 0.995033085), 'rise'] = 1
            df_t.loc[(-0.995033085 <= target) & (target < 0), 'fall'] = 1
            df_t.loc[target < -0.995033085, 'drop'] = 1

            df_t = df_t.shift(-1 * x_days_later, axis=0)#予測先日数分だけ縦にずらす
            df_t = df_t.drop(df_t.index[-1*x_days_later:], axis=0)#ラベルデータ末尾の行を削除
            df_x = df_x.drop(df_x.index[-1*x_days_later:], axis=0)#学習データの末尾の行を削除

            if is_debug == True:
                print('df_t.shape=', df_t.shape, '\n')
                #print(df_t)
        
            #インデックスと列名を外し２次元配列に変換------------------------------------
            #print(df_x)
            x_data = df_x.values
            #print(df_t)
            t_data = df_t.values
        
            if is_debug == True:
                print('x_data.shape =', x_data.shape)
                print('t_data.shape =', t_data.shape, '\n')
        
            #学習データのテンソル化------------------------------------------------------
            maxlen = min_maxlen
            while maxlen <= max_maxlen:
                n_in = x_data.shape[1]   # 学習データ（＝入力）の列数
                n_out = t_data.shape[1]  # ラベルデータ（=出力）の列数
                len_seq = x_data.shape[0] - maxlen + 1
                print('len_seq', len_seq, '\n')#●デバッグ
                data = []
                target = []

                #
                for i in range(0, len_seq):
                    data.append(x_data[i:i+maxlen, :])
                    target.append(t_data[i+maxlen-1, :])

                x = np.array(data).reshape(len(data), maxlen, n_in)
                t = np.array(target).reshape(len(data), n_out)

                if is_debug == True:
                    print('x.shape=', x.shape)
                    print('t.shape=', t.shape, '\n')

                # ここからソースコードの後半
                n_train = int(len(data)*0.9)              # 訓練データ長
                x_train,x_test = np.vsplit(x, [n_train])  # 学習データを訓練用とテスト用に分割
                t_train,t_test = np.vsplit(t, [n_train])  # ラベルデータを訓練用とテスト用に分割

                if is_debug == True:
                    print('x_train.shape=', x_train.shape)
                    print('x_test.shape=', x_test.shape, '\n')
                    print('t_train.shape=', t_train.shape)
                    print('t_test.shape=', t_test.shape, '\n')
            
                #メイン処理--------------------------------------------------------------
                n_hidden = min_n_hidden
                while n_hidden <= max_n_hidden:   
                    epochs = 100      # エポック数（同じデータでの学習回数）
                    batch_size = 256  #バッチサイズ
                
                    #パラメータの表示
                    print('model: ', learning_model)
                    print('day: ', x_days_later)
                    print('since: ', year)
                    print('maxlen: ', maxlen)
                    print('n_hidden: ', n_hidden, '\n')
                
                    # モデル定義
                    prediction = Prediction(maxlen, n_hidden, n_in, n_out, learning_model)
                    
                    # 学習時間の計測開始
                    start = time.time()
                    
                    # 学習
                    model = prediction.train(x_train, t_train, batch_size, epochs)
                    
                    # 学習時間の計測終了と表示
                    end = time.time()

                    #予測精度の評価------------------------------------------------------

                    # 正答率、準正答率（騰落）集計
                    preds = model.predict(x_test)
                
                    #正解数を数える変数
                    correct = 0
                    semi_correct = 0
                
                    #表を作るためのデータフレーム
                    matrix = pd.DataFrame(columns=['jump!', 'rise!', 'fall!', 'drop!'], index=['jump?', 'rise?', 'fall?', 'drop?'])
                    matrix = matrix.fillna(0)

                    #正解数を数える
                    for i in range(len(preds)):
                        pred = np.argmax(preds[i,:])#argmaxとは配列の最大要素のインデックスを返すメソッドである
                        tar = np.argmax(t_test[i,:])
                        matrix.iat[pred, tar] = matrix.iat[pred, tar] + 1 #●マトリックスのセルをインクリメント
                        if pred == tar :#完全一致
                           correct += 1
                        else :
                            if pred+tar == 1 or pred+tar == 5 :
                                semi_correct += 1
                
                    #正答率と準正答率を求める
                    correct_rate = 1.0 *correct / len(preds) 
                    semi_correct_rate = 1.0 * (correct+semi_correct) / len(preds)
                
                    #csvに記録
                    series = pd.Series([end - start,
                                        x_days_later,
                                        learning_model,
                                        year,
                                        end_date,
                                        maxlen,
                                        n_hidden,
                                        correct_rate,
                                        semi_correct_rate],
                                        index = result_csv.columns)
                    result_csv = result_csv.append(series, ignore_index = True)
                    result_csv.to_csv('./log/log.csv', index=False)
                
                    #表と正答率と学習時間を表示
                    print('\n', matrix, '\n')
                    print("正答率:", 1.0 * correct / len(preds))
                    print("準正答率（騰落）:", 1.0 * (correct+semi_correct) / len(preds))
                    print ("学習時間:{0}".format(end - start) + "[sec]\n")
                
                
                    #次のステップへ
                    n_hidden += 100
                    
                #次のステップへ
                maxlen += 100

            USD_JPY_Open  USD_JPY_High  USD_JPY_Low  USD_JPY_Close  \
Date                                                                 
2008-11-13     -2.719703      0.173178     0.052907       2.771180   
2008-11-14      2.781128     -0.183374     1.678420      -0.616082   
2008-11-17     -1.484716     -0.521394    -0.197845      -0.671733   
2008-11-18      0.155691     -0.143604     0.093765       0.599609   
2008-11-19      0.620349     -0.267242    -0.375587      -1.338541   
2008-11-20     -1.338541     -0.972187    -2.230183      -2.122012   
2008-11-21     -2.122012     -0.260159     0.106826       2.361982   
2008-11-25      1.407017      0.082144     0.010531      -2.191499   
2008-11-26     -2.191499     -1.541118    -0.369257       0.450522   
2008-11-27      0.450522     -0.229573     0.421897      -0.408527   
2008-11-28     -0.513547      0.000000     0.136734       0.303936   
2008-12-01      0.398490     -0.167294    -2.210504      -2.489931   
2008-12-02     -2.46

Train on 1068 samples, validate on 119 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 00006: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      1     70     61      0
fall?      0      0      0      0
drop?      0      0      0      0 

正答率: 0.5303030303030303
準正答率（騰落）: 0.5378787878787878
学習時間:8.18613314628601[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  100
n_hidden:  200 

Train on 1068 samples, validate on 119 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00005: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     29     24      0
fall?      1     41     37      0
drop?      0      0      0      0 

正答率: 0.5
準正答率（騰落）: 0.5
学習時間:6.553793668746948[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  100
n_hidden:  300 

Train on 1068 samples, validate on 119 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 00004: e

Train on 987 samples, validate on 110 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00005: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     65     57      0
fall?      0      0      0      0
drop?      0      0      0      0 

正答率: 0.5327868852459017
準正答率（騰落）: 0.5327868852459017
学習時間:10.004870891571045[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  200
n_hidden:  400 

Train on 987 samples, validate on 110 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 00003: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     33     29      0
fall?      0     32     28      0
drop?      0      0      0      0 

正答率: 0.5
準正答率（騰落）: 0.5
学習時間:7.584000110626221[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  200
n_hidden:  500 

Train on 987 samples, validate on 110 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 00003: early stopping

        jump!  rise!  fall!  drop!

Epoch 8/100
Epoch 9/100
Epoch 00009: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     50     42      0
fall?      0      0      0      0
drop?      0      0      0      0 

正答率: 0.5434782608695652
準正答率（騰落）: 0.5434782608695652
学習時間:25.15278387069702[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  500
n_hidden:  200 

Train on 744 samples, validate on 83 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00005: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     49     40      0
fall?      0      1      2      0
drop?      0      0      0      0 

正答率: 0.5543478260869565
準正答率（騰落）: 0.5543478260869565
学習時間:17.25955033302307[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  500
n_hidden:  300 

Train on 744 samples, validate on 83 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 00006: early stopping

        jump!  rise!  fall!  

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 00007: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     31     24      0
fall?      0     13     14      0
drop?      0      0      0      0 

正答率: 0.5487804878048781
準正答率（騰落）: 0.5487804878048781
学習時間:24.99537181854248[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  600
n_hidden:  300 

Train on 663 samples, validate on 74 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 00006: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     14     12      0
fall?      0     30     26      0
drop?      0      0      0      0 

正答率: 0.4878048780487805
準正答率（騰落）: 0.4878048780487805
学習時間:22.66836142539978[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  600
n_hidden:  400 

Train on 663 samples, validate on 74 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 00004: early stopping

        jump!  ri

Train on 582 samples, validate on 65 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 00006: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     30     25      0
fall?      0      8      9      0
drop?      0      0      0      0 

正答率: 0.5416666666666666
準正答率（騰落）: 0.5416666666666666
学習時間:26.20490312576294[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  700
n_hidden:  400 

Train on 582 samples, validate on 65 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 00004: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     34     26      0
fall?      0      4      8      0
drop?      0      0      0      0 

正答率: 0.5833333333333334
準正答率（騰落）: 0.5833333333333334
学習時間:20.36939811706543[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  700
n_hidden:  500 

Train on 582 samples, validate on 65 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
E

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 00007: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     29     18      0
fall?      0      6      9      0
drop?      0      0      0      0 

正答率: 0.6129032258064516
準正答率（騰落）: 0.6129032258064516
学習時間:26.326387405395508[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  800
n_hidden:  400 

Train on 501 samples, validate on 56 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 00005: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0      0      1      0
fall?      0     35     26      0
drop?      0      0      0      0 

正答率: 0.41935483870967744
準正答率（騰落）: 0.41935483870967744
学習時間:21.813742637634277[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  800
n_hidden:  500 

Train on 501 samples, validate on 56 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 00004: early stopping

 

Train on 339 samples, validate on 38 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 00008: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0      4      1      0
fall?      0     21     16      0
drop?      0      0      0      0 

正答率: 0.47619047619047616
準正答率（騰落）: 0.47619047619047616
学習時間:35.406339168548584[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  1000
n_hidden:  400 

Train on 339 samples, validate on 38 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 00006: early stopping

        jump!  rise!  fall!  drop!
jump?      0      0      0      0
rise?      0     19     10      0
fall?      0      6      7      0
drop?      0      0      0      0 

正答率: 0.6190476190476191
準正答率（騰落）: 0.6190476190476191
学習時間:30.421555042266846[sec]

model:  LSTM
day:  1
since:  2013
maxlen:  1000
n_hidden:  500 

Train on 339 samples, validate on 38 sam

## 計算終了の合図

In [5]:
for i in range(3):
    winsound.Beep(784,300)
    winsound.Beep(698,300)
    winsound.Beep(784,600)

## 結果発表

In [None]:
#最大
for i in day_list:
    print('\n', i, 'days later\n')
    result2 = result_csv[result_csv['day'] == int(i)]
    
    print('平均計算時間')
    print(result2['time'].mean(), '秒')
    
    print('max correct')
    print(result2[result2['correct'] == result2['correct'].max()])
    
    print('\nmax semi correct')
    print(result2[result2['semi_correct'] == result2['semi_correct'].max()])

## 正答率の平均

In [20]:
print('正答率')
print(result_csv['correct'].mean())
print('準正答率')
print(result_csv['semi_correct'].mean())
#csv = pd.read_csv('./log/log.csv')
#print(csv['semi_correct'].mean())

正答率
0.5332163099208822
準正答率
0.5339205352729949


In [14]:
x = {'a': 1,
     'b': 2,
     'c' : 3}

# キーを取得
for i in x:
    print(i)
    
# キーを取得
for i in x.keys():
    print(i)

# 要素を取得
for i in x.values():
    print(i)

# キーと要素を取得
for i, j in x.items():
    print(i, j)
    
print(x['a'])

a
b
c
a
b
c
1
2
3
a 1
b 2
c 3
1
