In [51]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import preprocessing
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import GRU

from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

In [71]:
df = pd.read_csv('nikkei_2001_2018.csv',encoding='SHIFT-JIS')
df.head()

Unnamed: 0,日付,始値,高値,安値,終値,出来高,終値調整値
0,2001-07-13,1,12490,12300,2,259430,12310
1,2001-07-16,3,12400,12250,2,99740,12330
2,2001-07-17,5,12170,12100,6,93150,12120
3,2001-07-18,7,12150,11830,6,165330,11840
4,2001-07-19,9,11990,11860,10,620220,11920


In [72]:
#データの標準化
max = 0
min = 0

if df['始値'].max() <= df['終値'].max():
    max = df['終値'].max()
else:
    max = df['始値'].max()

if df['始値'].min() >= df['終値'].min():
    min = df['終値'].min()
else:
    min = df['始値'].min()

    
df_temp = df.loc[:, ['始値', '終値']]

scaler = preprocessing.MinMaxScaler()
scaler.fit([[min],[max]])

df_temp = scaler.transform(df_temp)

df_temp = pd.DataFrame(df_temp)

df['open'] = df_temp[0]
df['close'] = df_temp[1]

df.head()

Unnamed: 0,日付,始値,高値,安値,終値,出来高,終値調整値,open,close
0,2001-07-13,1,12490,12300,2,259430,12310,0.0,0.071429
1,2001-07-16,3,12400,12250,2,99740,12330,0.142857,0.071429
2,2001-07-17,5,12170,12100,6,93150,12120,0.285714,0.357143
3,2001-07-18,7,12150,11830,6,165330,11840,0.428571,0.357143
4,2001-07-19,9,11990,11860,10,620220,11920,0.571429,0.642857


In [82]:
result = pd.DataFrame()
epochs_num = 10
max_model_open = Sequential()
max_model_close = Sequential()
max_accuracy = 0
accuracy = 0
n_in = 1
n_out = 1
n_hidden = 1000
maxlen = 20


#ハイパーパラメータの設定
for batch_num in range(10,12):

    X_open, Y_open,X_close,Y_close = [], [],[],[]
    for i in range(len(df) - maxlen):
        X_open.append(df[['open']].iloc[i:(i+maxlen)].values)
        Y_open.append(df[['open']].iloc[i+maxlen].values)
        X_close.append(df[['close']].iloc[i:(i+maxlen)].values)
        Y_close.append(df[['close']].iloc[i+maxlen].values)


    X_open=np.array(X_open)
    Y_open=np.array(Y_open)
    X_close=np.array(X_close)
    Y_close=np.array(Y_close)

    # 訓練用のデータと、テスト用のデータに分ける
    N_train = int(len(df) * 0.8)
    N_test = len(df) - N_train
    X_open_train, X_open_test, Y_open_train, Y_open_test = train_test_split(X_open, Y_open, test_size=N_test, shuffle = False)
    X_close_train, X_close_test, Y_close_train, Y_close_test = train_test_split(X_close, Y_close, test_size=N_test, shuffle = False) 

    #モデル作成 (Kerasのフレームワークで簡易に記載できる)
    model_open = Sequential()
    model_open.add(GRU(n_hidden,
                   batch_input_shape=(None, maxlen, n_in),
                   kernel_initializer='random_uniform',
                   return_sequences=False))
    model_open.add(Dense(n_in, kernel_initializer='random_uniform'))
    model_open.add(Activation("linear"))

    opt_open = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
    model_open.compile(loss = "mean_squared_error", optimizer=opt_open)



    model_close = Sequential()
    model_close.add(GRU(n_hidden,
                   batch_input_shape=(None, maxlen, n_in),
                   kernel_initializer='random_uniform',
                   return_sequences=False))
    model_close.add(Dense(n_in, kernel_initializer='random_uniform'))
    model_close.add(Activation("linear"))

    opt_close = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
    model_close.compile(loss = "mean_squared_error", optimizer=opt_close)


    early_stopping_open = EarlyStopping(monitor='loss', patience=10, verbose=1)
    hist_open = model_open.fit(X_open_train, Y_open_train, batch_size=batch_num, epochs=epochs_num,
                     callbacks=[early_stopping_open],shuffle=False)

    early_stopping_close = EarlyStopping(monitor='loss', patience=10, verbose=1)
    hist_close = model_close.fit(X_close_train, Y_close_train, batch_size=batch_num, epochs=epochs_num,
                     callbacks=[early_stopping_close],shuffle=False)
    
    
    
    predicted= model_open.predict(X_open_test)
    result_open= pd.DataFrame(predicted)
    result_open.columns = ['predict']
    result_open['actual'] = Y_open_test

    predicted = model_close.predict(X_close_test)
    result_close = pd.DataFrame(predicted)
    result_close.columns = ['predict']
    result_close['actual'] = Y_close_test
    
    
    high_low = []
    for i in range(0,len(result_open)):
        if result_open['actual'][i] <= result_close['actual'][i]:
            high_low.append([result_open['actual'][i],result_close['actual'][i],1])
        else:
            high_low.append([result_open['actual'][i],result_close['actual'][i],-1])

    high_low_pre = []
    for i in range(0,len(result_open)):
        if result_open['predict'][i] <= result_close['predict'][i]:
            high_low_pre.append([result_open['predict'][i],result_close['predict'][i],1])
        else:
            high_low_pre.append([result_open['predict'][i],result_close['predict'][i],-1])

            
    count = 0
    for i in range(0,(len(high_low)-1)):
        if high_low[i][2] == high_low_pre[i-1][2]:
            count += 1
    accuracy = count/(len(high_low)-1)
    
    if max_accuracy < accuracy:
        max_accuracy = accuracy
        max_model_open = model_open
        max_model_close = model_close
    
    result.at[batch_num, 'accuracy']  = accuracy

result

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Unnamed: 0,accuracy
10,0.424242
11,0.606061


In [83]:
max_accuracy

0.6060606060606061

In [75]:
max_model_open.save('model_open')
max_model_close.save('model_close')

In [77]:
from keras.models import load_model

maxlen = 15

X_open, Y_open,X_close,Y_close = [],[],[],[]
for i in range(len(df) - maxlen):
    X_open.append(df[['open']].iloc[i:(i+maxlen)].values)
    Y_open.append(df[['open']].iloc[i+maxlen].values)
    X_close.append(df[['close']].iloc[i:(i+maxlen)].values)
    Y_close.append(df[['close']].iloc[i+maxlen].values)


X_open=np.array(X_open)
Y_open=np.array(Y_open)
X_close=np.array(X_close)
Y_close=np.array(Y_close)

# 訓練用のデータと、テスト用のデータに分ける
N_train = int(len(df) * 0.8)
N_test = len(df) - N_train
X_open_train, X_open_test, Y_open_train, Y_open_test = train_test_split(X_open, Y_open, test_size=N_test, shuffle = False)
X_close_train, X_close_test, Y_close_train, Y_close_test = train_test_split(X_close, Y_close, test_size=N_test, shuffle = False) 




model_open = Sequential()
model_open = load_model('model_open')

model_close = Sequential()
model_close = load_model('model_close')


predicted= model_open.predict(X_open_test)
result_open= pd.DataFrame(predicted)
result_open.columns = ['predict']
result_open['actual'] = Y_open_test

predicted = model_close.predict(X_close_test)
result_close = pd.DataFrame(predicted)
result_close.columns = ['predict']
result_close['actual'] = Y_close_test


high_low = []
for i in range(0,len(result_open)):
    if result_open['actual'][i] <= result_close['actual'][i]:
        high_low.append([result_open['actual'][i],result_close['actual'][i],1])
    else:
        high_low.append([result_open['actual'][i],result_close['actual'][i],-1])

high_low_pre = []
for i in range(0,len(result_open)):

    if result_open['predict'][i] <= result_close['predict'][i]:
        high_low_pre.append([result_open['predict'][i],result_close['predict'][i],1])
    else:
        high_low_pre.append([result_open['predict'][i],result_close['predict'][i],-1])

        
count = 0
for i in range(0,(len(high_low)-1)):
    if high_low[i][2] == high_low_pre[i-1][2]:
        count += 1
accuracy = count/(len(high_low)-1)



In [78]:
count = 0
for i in range(0,(len(high_low)-1)):
    print(str(high_low[i][2]) + '\t' + str(high_low_pre[i-1][2]))
    if high_low[i][2] == high_low_pre[i-1][2]:
        count += 1

1	1
-1	-1
1	1
-1	1
1	1
1	1
-1	1
1	1
-1	1
1	-1
-1	-1
1	1
-1	1
-1	-1
1	-1
-1	-1
1	1
-1	1
1	1
1	1
-1	1
1	1
-1	1
1	-1
-1	-1
1	1
-1	1
-1	-1
1	-1
-1	-1
1	1
-1	1
1	1


In [79]:
high_low_pre = []
for i in range(0,len(result_open)):
    print(str(result_open['predict'][i]) + ' ' + str(result_close['predict'][i]))
    if result_open['predict'][i] <= result_close['predict'][i]:
        high_low_pre.append([result_open['predict'][i],result_close['predict'][i],1])
    else:
        high_low_pre.append([result_open['predict'][i],result_close['predict'][i],-1])

0.771098 0.756734
0.58589 0.596812
0.390828 0.419702
0.233584 0.264535
0.139287 0.164872
0.108638 0.120353
0.121095 0.154643
0.22896 0.229776
0.411674 0.397615
0.60861 0.601923
0.766319 0.766955
0.859617 0.860414
0.891999 0.882213
0.885016 0.87083
0.771098 0.756734
0.58589 0.596812
0.390828 0.419702
0.233584 0.264535
0.139287 0.164872
0.108638 0.120353
0.121095 0.154643
0.22896 0.229776
0.411674 0.397615
0.60861 0.601923
0.766319 0.766955
0.859617 0.860414
0.891999 0.882213
0.885016 0.87083
0.771098 0.756734
0.58589 0.596812
0.390828 0.419702
0.233584 0.264535
0.139287 0.164872
0.108638 0.120353


In [33]:
str_input = input()

2018/2/2
