<a href="https://colab.research.google.com/github/nsstnaka/machine_learning_handson/blob/master/stock_price_prediction_with_rnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ディープラーニングによる株価予測

直近50営業日の4本値＋出来高をもとに、終値を予想します。


## 前準備

ライブラリのimport

In [None]:
import pandas as pd
import pandas_datareader as pdr
import numpy as np
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt

TensorFlowのバージョン確認

In [None]:
tf.__version__

## 株価取得

In [None]:
df = pdr.data.DataReader('^DJI', 'yahoo', '2017-04-01', '2020-03-31')  # '^DJI'の部分を変えると違う株価を拾える（例：AAPL, GOOG）
df.reset_index(inplace=True)  # 後続処理のためインデックスを振りなおす
df.head(10)

可視化

In [None]:
df.plot(x='Date', y=['Close'])

統計情報

In [None]:
df.describe()

## 前処理

4本値＋出来高の抽出

In [None]:
features = df[['High', 'Low', 'Open', 'Close', 'Volume']].values
features

正規化

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)
features_scaled

時系列データおよびラベルの作成

In [None]:
N = 50
all_xs = []
all_ys = []
for idx in range(len(features_scaled)):
    if idx < N:
        continue
    all_xs.append(features_scaled[idx-N:idx, :])  # 直近N営業日のHigh, Low, Open, Close, Volume
    all_ys.append(features_scaled[idx, 3])  # 当日のClose
all_xs = np.array(all_xs)
all_ys = np.array(all_ys)

できたデータのshapeを確認

In [None]:
all_xs.shape

In [None]:
all_ys.shape

訓練データとテストデータに分割
（最新の100営業日分をテストデータにし、残りを訓練データにする）

In [None]:
test_num = 100
train_xs = all_xs[:-test_num]
train_ys = all_ys[:-test_num]
test_xs = all_xs[-test_num:]
test_ys = all_ys[-test_num:]

In [None]:
print(train_xs.shape, train_ys.shape, test_xs.shape, test_ys.shape)

## 学習と評価

### (1) 1層のRNN

モデル構築

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(N, train_xs.shape[2])),
#   tf.keras.layers.GRU(128, input_shape=(N, 1)),
#    tf.keras.layers.SimpleRNN(128, input_shape=(N, 1)),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')

モデル確認

In [None]:
model.summary()

学習

In [None]:
model.fit(x=train_xs, y=train_ys, batch_size=8, epochs=50)

評価

In [None]:
pred = model.predict(x=test_xs)
pred = pred / scaler.scale_[3] + scaler.data_min_[3]
test_df = pd.DataFrame(pred, columns=['predict'])
test_df['actual'] = test_ys / scaler.scale_[3] + scaler.data_min_[3]
test_df.plot()

### 2層のRNN（ドロップアウト付）

モデル構築

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(N, train_xs.shape[2]), return_sequences=True),
#   tf.keras.layers.GRU(128, input_shape=(N, 1)),
#    tf.keras.layers.SimpleRNN(128, input_shape=(N, 1)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.LSTM(128),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')

モデル確認

In [None]:
model.summary()

学習

In [None]:
model.fit(x=train_xs, y=train_ys, batch_size=8, epochs=50)

評価

In [None]:
pred = model.predict(x=test_xs)
pred = pred / scaler.scale_[3] + scaler.data_min_[3]
test_df = pd.DataFrame(pred, columns=['predict'])
test_df['actual'] = test_ys / scaler.scale_[3] + scaler.data_min_[3]
test_df.plot()