In [61]:
import numpy as np
import pandas as pd
from importlib import reload
from pylab import plot

import utils; reload(utils)

<module 'utils' from '/Users/kizawamasakazu/projects/machine_learning/work/time_series/stock_price_prediction/n225/utils.py'>

In [137]:
# N225株価データ
df = pd.read_csv('N225.csv')
df = df.set_index('Date')
df['y'] = np.log(df['Adj Close']).diff().shift(-1)
columns = ['Adj Close', 'High', 'Low', 'Open', 'Volume']
df = df.dropna()


In [None]:
import torch
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, X, y):
        assert len(X) == len(y)
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx) -> np.ndarray:
        return self.X[idx], self.y[idx]

def make_dataset(df, seqlen):
    X = df[['Adj Close', 'High', 'Low', 'Open']].values
    y = df['y'].values
    
    datalen = X.shape[0]
    Xs, ys = [], []  
    for i in range(datalen-seqlen+1):
        Xs.append(X[i:i+seqlen,:])
        ys.append(y[i:i+1])
    Xs = np.stack(Xs)
    ys = np.stack(ys)
    return MyDataset(Xs, ys)

# scaling
scaler = MinMaxScaler()
scaled_df = pd.DataFrame(index=df.index)
scaled_df[[*columns, 'y']] = scaler.fit_transform(df.loc[:, [*columns, 'y']])

train, test = train_test_split(scaled_df, test_size=0.2, shuffle=False)
train, val = train_test_split(scaled_df, test_size=0.2, shuffle=False)
train_dataset = make_dataset(train, seqlen=60)
val_dataset = make_dataset(val, seqlen=60)
train_dataloader = DataLoader(train_dataset, batch_size=129)
val_dataloader = DataLoader(val_dataset, batch_size=1)
X, y = next(iter(train_dataloader))
print(X.shape, y.shape)

In [135]:
# モデル定義
import torch
import torch.nn as nn
import torch.optim as optimizers
from torch.utils.data import DataLoader

class RNN(nn.Module):

    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.l1 = nn.RNN(input_dim, hidden_dim, nonlinearity='tanh', batch_first=True)
        self.l2 = nn.Linear(hidden_dim, 1)
        
        nn.init.xavier_normal_(self.l1.weight_ih_l0)
        nn.init.orthogonal(self.l1.weight_hh_l0)
        
    def forward(self, x):
        h, _ = self.l1(x)
        y = self.l2(h[:, -1])
        return y

In [136]:
torch.manual_seed(123)

device = torch.device('cpu')
input_dim = 4
hidden_dim = 64
model = RNN(input_dim, hidden_dim)
optimizer = optimizers.Adam(model.parameters(), lr=0.005)
criterion = nn.L1Loss()

epochs = 50
batch_size = 128
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)


def compute_loss(y, yhat):
    return criterion(yhat, y)

def train_step(x, y):
    model.train()
    yhat = model(x)
    loss = compute_loss(y, yhat)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return yhat, loss

def val_step(x, y):
    model.eval()
    yhat = model(x)
    loss = compute_loss(y, yhat)
    return yhat, loss

for epoch in range(epochs):
    train_loss = .0
    val_loss = .0
    
    for X, y in train_dataloader:
        _, loss = train_step(X, y)    
        train_loss += loss.item()
    train_loss /= len(train_dataloader)
    
    for X, y in val_dataloader:
        _, loss = val_step(X, y)
        val_loss += loss.item()
    val_loss /= len(val_dataset)
    
    print(
        'epoch: {}, train loss: {}, val loss: {}'\
        .format(epoch, train_loss, val_loss)
    )


  nn.init.orthogonal(self.l1.weight_hh_l0)


epoch: 0, train loss: 0.04551415020538555, val loss: 0.04251805328777138
epoch: 1, train loss: 0.03129968176094385, val loss: 0.03185186869871449
epoch: 2, train loss: 0.030121333098837307, val loss: 0.03151913540254177
epoch: 3, train loss: 0.03057419862802867, val loss: 0.03463617933677955
epoch: 4, train loss: 0.03016514228759231, val loss: 0.03285168133361239
epoch: 5, train loss: 0.0296534845748773, val loss: 0.03398980784793974
epoch: 6, train loss: 0.030677377010931026, val loss: 0.03883742202125805
epoch: 7, train loss: 0.029747689797819315, val loss: 0.036147157438624074
epoch: 8, train loss: 0.030495204461308625, val loss: 0.03228093672176482
epoch: 9, train loss: 0.029928806821232314, val loss: 0.031631492466573985
epoch: 10, train loss: 0.029619674310906903, val loss: 0.031725456901419334
epoch: 11, train loss: 0.02946103953725689, val loss: 0.03719429072569794
epoch: 12, train loss: 0.030180084815883374, val loss: 0.031572388290939196
epoch: 13, train loss: 0.0296572425535