# Chainer + LSTM でAmazonの株価を予測する。

In [None]:
import pandas as pd
import numpy as np
import datetime as dt

from matplotlib import pyplot as plt
import seaborn as sns

# warningの抑止
import warnings
warnings.filterwarnings('ignore')

## 再現性

In [None]:
import random
import numpy
import chainer


def reset_seed(seed=0):
    random.seed(seed)
    numpy.random.seed(seed)
    if chainer.cuda.available:
        chainer.cuda.cupy.random.seed(seed)


reset_seed(0)

## データ読み込み

In [None]:
import pandas_datareader.data as web

try:
    df = pd.read_csv("data/stock_amazon.csv")
    df["Date"] = pd.to_datetime(df["Date"])
    df.set_index("Date", drop=True, inplace=True)

except FileNotFoundError as e:
    start = dt.date(2008, 1, 1)
    end = dt.date(2018, 12, 31)
    df = web.DataReader('AMZN', "yahoo", start, end)
    df.to_csv("data/stock_amazon.csv", index=True, header=True)

display(df.head(5), df.describe())

## 過去データの作成

25日分の過去データを作成

In [None]:
pd.set_option("display.max_columns", 200)

# 
for i in range(1, 26):
    df[[
        "High_past_{0:02d}".format(i), "Low_past_{0:02d}".format(i),
        "Open_past_{0:02d}".format(i), "Close_past_{0:02d}".format(i),
        "Volume_{0:02d}".format(i), "Adj_Close_{0:02d}".format(i)
    ]] = df[["High", "Low", "Open", "Close", "Volume", "Adj Close"]].shift(i)

df[:30]

In [None]:
df.dropna(inplace=True)

display(df.shape, df.head())

## 説明変数と目的変数の作成

In [None]:
# 説明変数
# X = df.iloc[:, 6:]

close_col = [
    "Close_past_01", "Close_past_02", "Close_past_03", "Close_past_04",
    "Close_past_05", "Close_past_06", "Close_past_07", "Close_past_08",
    "Close_past_09", "Close_past_10", "Close_past_11", "Close_past_12",
    "Close_past_13", "Close_past_14", "Close_past_15", "Close_past_16",
    "Close_past_17", "Close_past_18", "Close_past_19", "Close_past_20",
    "Close_past_21", "Close_past_22", "Close_past_23", "Close_past_24",
    "Close_past_25"
]
X = df.loc[:, close_col]

# 目的変数
Y = df["Close"]

display(X.head(), Y.head())

In [None]:
from sklearn.model_selection import train_test_split

# クロスバリデーション用にテストデータを分割(学習：70% テスト:30%)
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.3, shuffle=False, random_state=1234)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

## LSTM回帰モデル作成

In [None]:
from chainer import Chain
import chainer.links as L
import chainer.functions as F

class LSTM(Chain):
    def __init__(self):
        # Optimizerが最適化対象のパラメータをもつ事を暗示する。
        super(LSTM, self).__init__()

        with self.init_scope():                
            self.l1 = L.Linear(None, 50)
            self.l2 = L.LSTM(None, 50)
            self.l3 = L.Linear(None, 1)

    def reset_state(self):
        self.l2.reset_state()  
        
        
    def __call__(self, x):
        
        # データを受け取った際のforward計算
        h = F.relu(self.l1(x))
        h = F.dropout(h, ratio=0.7)
        
        h = F.relu( self.l2(h))
        
        # マイナスはゼロに変換   
        h = F.relu( self.l3(h))
    
        return h
    
class LSTMRegressor(Chain):
    def __init__(self, predictor):
        super(LSTMRegressor, self).__init__(predictor=predictor)

    def __call__(self, x, y):
        pred = self.predictor(x)

        loss = F.mean_squared_error(pred, y)
        report({'loss': loss}, self)

        return loss
    
model = LSTMRegressor(LSTM())

## オプティマイザの設定

In [None]:
from chainer import optimizers

# 最適化手法をAdamに指定
optimizer = optimizers.Adam(alpha=0.01)

# モデルにオプティマイザーを組み込む
optimizer.setup(model)

### Iteratorの作成

In [None]:
from sklearn.preprocessing import StandardScaler

# 標準正規分布で標準化
scaler = StandardScaler()

X_train_scale = scaler.fit_transform(X_train)
X_test_scale = scaler.fit_transform(X_test)

In [None]:
from chainer.datasets import tuple_dataset
from chainer import iterators

train_set = tuple_dataset.TupleDataset(X_train_scale.astype(np.float32), y_train.values.reshape(-1, 1).astype(np.float32))
test_set = tuple_dataset.TupleDataset(X_test_scale.astype(np.float32), y_test.values.reshape(-1, 1).astype(np.float32))

# 25日移動平均線を意識してバッチサイズを25に設定
batchsize = 25

# Trainでは、時系列は意識させるのでshufleしない)
train_iter = iterators.SerialIterator(train_set, batchsize, shuffle=False)
test_iter = iterators.SerialIterator(test_set, batchsize, repeat=False, shuffle=False)

### Updaterの作成

In [None]:
from chainer import training

updater = training.StandardUpdater(train_iter, optimizer)

### Trainerの準備

In [None]:
from chainer.training import extensions
from chainer import report

trainer = training.Trainer(updater, (20, 'epoch'))
trainer.extend(extensions.Evaluator(test_iter, model))
trainer.extend(extensions.LogReport())
trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))

## 学習

In [None]:
trainer.run()

In [None]:
import chainer
from chainer import Variable

# model.predictor.reset_state()

test_var =Variable(X_train.values.astype(np.float32))

# モデルのforward関数に渡す
with chainer.using_config('train', False), chainer.using_config('enable_backprop', False):
    pred_y = model.predictor(test_var)

pred_y_df = pd.DataFrame(pred_y.data, index=X_train.index)
    
plt.plot(df["Close"], label="true")
plt.plot(pred_y_df, color="r", label="train")
# plt.plot(-8.644806, color="y", label="predict")
plt.title("Amazon - Stock Price")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.legend()
plt.grid()