In [1]:
import torch
from torch import nn, cuda
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
from torchsummary import summary

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tqdm.auto import tqdm, trange

import optuna
from optuna.integration import SkoptSampler # pip install scikit_optimize
from optuna.samplers import RandomSampler
import madgrad # pip install madgrad
import warnings
import matplotlib.pyplot as plt
import random
import os
import shutil
%matplotlib inline
plt.style.use("default")

# MISC

### ignore UserWarning

In [2]:
warnings.filterwarnings("ignore", category=RuntimeWarning)

### global variables

In [3]:
global device, batch_size, img_size
device = torch.device("cuda" if cuda.is_available() else "cpu")
batch_size = 128
img_size = 24

### reproductibility

In [4]:
global SEED
SEED = 13


def set_seed(random_seed: int = SEED):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)

In [5]:
def save_weight(model, path: str, weight_file_name: str):
    os.makedirs(path, exist_ok=True)
    torch.save(model.state_dict(), f"{path}/{weight_file_name}")
    print(f"[+] Saved {path}/{weight_file_name}")

# Data

In [6]:
df = pd.read_csv("./data/kospi.csv")
print(df.head())
df = df.drop(columns=["Date", "Adj Close"], axis=1)  # drop unnecessary columns
print(df.head())
scaler = MinMaxScaler(
    feature_range=(0, 1)
)  # Transform features by scaling each feature to a given range.
df[:] = scaler.fit_transform(df[:])
print(df.head())


         Date         Open         High          Low        Close  \
0  2019-01-30  2183.489990  2206.199951  2177.879883  2206.199951   
1  2019-01-31  2222.879883  2222.879883  2201.219971  2204.850098   
2  2019-02-01  2211.929932  2218.780029  2201.409912  2203.459961   
3  2019-02-07  2215.770020  2215.790039  2196.770020  2203.419922   
4  2019-02-08  2192.010010  2192.479980  2171.959961  2177.050049   

     Adj Close  Volume  
0  2206.199951  480400  
1  2204.850098  545200  
2  2203.459961  479400  
3  2203.419922  427800  
4  2177.050049  391000  
          Open         High          Low        Close  Volume
0  2183.489990  2206.199951  2177.879883  2206.199951  480400
1  2222.879883  2222.879883  2201.219971  2204.850098  545200
2  2211.929932  2218.780029  2201.409912  2203.459961  479400
3  2215.770020  2215.790039  2196.770020  2203.419922  427800
4  2192.010010  2192.479980  2171.959961  2177.050049  391000
       Open      High       Low     Close    Volume
0  0.722898

In [7]:
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 431 entries, 0 to 430
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    431 non-null    float64
 1   High    431 non-null    float64
 2   Low     431 non-null    float64
 3   Close   431 non-null    float64
 4   Volume  431 non-null    float64
dtypes: float64(5)
memory usage: 17.0 KB
None


In [8]:
# Dataset
x = df.values
y = df["Close"].values


In [9]:
print(x[:5])
print(y[:5])


[[0.72289794 0.73235114 0.74552494 0.75923473 0.24211269]
 [0.76305769 0.75006898 0.76908866 0.75786563 0.27477069]
 [0.75189373 0.74571401 0.76928042 0.75645566 0.24160871]
 [0.75580887 0.74253797 0.76459607 0.75641506 0.21560327]
 [0.73158448 0.71777744 0.7395483  0.72966914 0.19705675]]
[0.75923473 0.75786563 0.75645566 0.75641506 0.72966914]


In [10]:
def seq_data(x, y, sequence_length):
    x_seq = []
    y_seq = []
    for i in range(len(x) - sequence_length):
        x_seq.append(x[i : i + sequence_length])  # a[2:6] -> 2,3,4,5
        y_seq.append(y[i + sequence_length])
    x_seq = np.array(x_seq)
    y_seq = np.array(y_seq)
    return torch.FloatTensor(x_seq).to(device), torch.FloatTensor(y_seq).to(
        device
    ).view(-1, 1)


In [11]:
def build_data(x, y, batch_size, test_ratio, sequence_length, quiet=True):
    x_seq, y_seq = seq_data(x, y, sequence_length)
    test_len = int(len(x_seq) * test_ratio)
    train_len = len(x_seq) - test_len
    x_train_seq = x_seq[:train_len]
    y_train_seq = y_seq[:train_len]
    x_test_seq = x_seq[train_len:]
    y_test_seq = y_seq[train_len:]

    train_dataset = torch.utils.data.TensorDataset(x_train_seq, y_train_seq)
    test_dataset = torch.utils.data.TensorDataset(x_test_seq, y_test_seq)

    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=batch_size, shuffle=True
    )
    test_loader = torch.utils.data.DataLoader(
        dataset=test_dataset, batch_size=batch_size, shuffle=False
    )
    if not quiet:
        print(f"[+] train len: {train_len}, test len: {test_len}")
    return train_dataset, test_dataset, train_loader, test_loader


# Model and Loss function

### model

In [12]:
class RNN(nn.Module):
    def __init__(
        self,
        input_size,
        hidden_size,
        output_size,
        seq_len,
        num_layers,
    ):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size * seq_len, output_size), nn.Sigmoid()
        )

    def forward(self, x):  # torch.Size([N, seq_len, 5])
        output, h_n = self.rnn(
            x
        )  # (N, seq_len, D * hidden_size), (D * num_layers, N, hidden_size)
        # many to many
        out = output.flatten(1)  # (N, seq_len * D * hidden_size)
        out = self.fc(out)
        return out


In [13]:
class LSTM(nn.Module):
    def __init__(
        self,
        input_size,
        hidden_size,
        output_size,
        seq_len,
        num_layers,
    ):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size * seq_len, output_size), nn.Sigmoid()
        )

    def forward(self, x):  # torch.Size([N, seq_len, 5])
        output, (h_n, c_n) = self.lstm(
            x
        )  # (N, seq_len, D * hidden_size), (D * num_layers, N, hidden_size), (D * num_layers, N, hidden_size)
        # many to many
        out = output.flatten(1)  # (N, seq_len * D * hidden_size)
        out = self.fc(out)
        return out


# Train

In [14]:
def train(model, criterion, optimizer, train_loader, epoch, quiet=False):
    loss_item = 0.0
    for data in train_loader:
        seq, target = data
        pred = model(seq)
        loss = criterion(pred, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_item += loss.item()
    loss_item = loss_item / len(train_loader)
    if not quiet:
        print(f"[Train Epoch: {epoch:3d}] loss: {loss_item:.4f}")
    return loss_item


# Test

In [15]:
def test(
    model, criterion, train_dataset, test_dataset, sequence_length, batch_size=100, quiet=True, print_plot=False
):
    concatdata = torch.utils.data.ConcatDataset([train_dataset, test_dataset])
    test_loader = torch.utils.data.DataLoader(
        dataset=concatdata, batch_size=batch_size, shuffle=False
    )
    with torch.no_grad():
        loss_item = 0.0
        preds = []
        model.eval()
        for data in test_loader:
            seq, target = data
            pred = model(seq)
            loss = criterion(pred, target)
            loss_item += loss.item()
            preds += pred.cpu().tolist()
    if print_plot:
        plt.figure(figsize=(15, 5))
        # plt.plot(np.ones(2) * len(train_dataset), np.linspace(0, 1, 2), "--", linewidth=0.6)
        plt.axvline(
            x=len(train_dataset),
            ymin=0,
            ymax=1,
            color="black",
            linestyle="--",
            linewidth=0.6,
        )
        plt.plot(
            df["Close"][sequence_length:].values,
            color="red",
            linestyle="solid",
            linewidth=0.6,
        )
        plt.plot(preds, color="blue", linestyle="dotted", linewidth=0.8)
        plt.legend(["train boundary", "actual", "prediction"])
        plt.show()
    if not quiet:
        print(f"[Test loss] {loss_item / len(test_loader):.4f}")
    return loss_item / len(test_loader)


# Objective

In [16]:
batch_size = 64
test_ratio = 0.5
n_epochs = 500

def objective(trial):
    input_size = 5
    hidden_size = trial.suggest_int("hidden_size", 8, 32)
    output_size = 1
    seq_len = trial.suggest_int("seq_len", 2, 32)
    num_layers = trial.suggest_int("num_layers", 1, 8)
    
    train_dataset, test_dataset, train_loader, test_loader = build_data(
        x, y, batch_size, test_ratio, seq_len
    )
    model_name = trial.suggest_categorical("model_name", ["RNN", "LSTM"])
    if model_name=="RNN":
        model = RNN(input_size, hidden_size, output_size, seq_len, num_layers).to(device)
    else:
        model = LSTM(input_size, hidden_size, output_size, seq_len, num_layers).to(device)
    opt_name = trial.suggest_categorical(
        "optimizer",
        ["Adam", "Adadelta", "RMSprop", "SGD", "MADGRAD"],
    )
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    if opt_name == "MADGRAD":
        optimizer = madgrad.MADGRAD(model.parameters(), lr=lr)
    else:
        optimizer = getattr(optim, opt_name)(model.parameters(), lr=lr)
    criterion = nn.MSELoss()
    
    for epoch in range(1, n_epochs + 1):
        train(model, criterion, optimizer, train_loader, epoch, quiet=(epoch+1!=n_epochs))
        loss = test(model, criterion, train_dataset, test_dataset, seq_len)
        trial.report(loss, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return loss

# Run

In [17]:
n_trials=50
sampler=RandomSampler()
study_name = f"random-{n_epochs}-{n_trials}"
storage = f"sqlite:///random.db"

study = optuna.create_study(study_name=study_name, direction="minimize", storage=storage, sampler=sampler, load_if_exists=True)
study.optimize(objective, n_trials=n_trials)

[32m[I 2022-09-02 20:54:22,400][0m A new study created in RDB with name: random-500-50[0m
[32m[I 2022-09-02 20:55:05,083][0m Trial 0 finished with value: 0.034492040332406756 and parameters: {'hidden_size': 17, 'seq_len': 14, 'num_layers': 7, 'model_name': 'RNN', 'optimizer': 'SGD', 'lr': 0.0005834364247901622}. Best is trial 0 with value: 0.034492040332406756.[0m


[Train Epoch: 499] loss: 0.0065
[Train Epoch: 499] loss: 0.0004


[32m[I 2022-09-02 20:56:17,447][0m Trial 1 finished with value: 0.003713490594236646 and parameters: {'hidden_size': 30, 'seq_len': 31, 'num_layers': 1, 'model_name': 'RNN', 'optimizer': 'MADGRAD', 'lr': 0.001212165415306527}. Best is trial 1 with value: 0.003713490594236646.[0m


[Train Epoch: 499] loss: 0.0062


[32m[I 2022-09-02 21:00:02,073][0m Trial 2 finished with value: 0.03342201877385378 and parameters: {'hidden_size': 14, 'seq_len': 16, 'num_layers': 7, 'model_name': 'LSTM', 'optimizer': 'MADGRAD', 'lr': 0.00030580895570084276}. Best is trial 1 with value: 0.003713490594236646.[0m


[Train Epoch: 499] loss: 0.0006


[32m[I 2022-09-02 21:01:10,010][0m Trial 3 finished with value: 0.005707031086785719 and parameters: {'hidden_size': 9, 'seq_len': 6, 'num_layers': 6, 'model_name': 'LSTM', 'optimizer': 'Adam', 'lr': 0.0006578094769578368}. Best is trial 1 with value: 0.003713490594236646.[0m


[Train Epoch: 499] loss: 0.0095


[32m[I 2022-09-02 21:01:56,431][0m Trial 4 finished with value: 0.03844385351985693 and parameters: {'hidden_size': 24, 'seq_len': 7, 'num_layers': 1, 'model_name': 'LSTM', 'optimizer': 'SGD', 'lr': 0.0016276228596029706}. Best is trial 1 with value: 0.003713490594236646.[0m
[32m[I 2022-09-02 21:01:56,722][0m Trial 5 pruned. [0m


[Train Epoch: 499] loss: 0.0016


[32m[I 2022-09-02 21:05:16,880][0m Trial 6 finished with value: 0.018442006106488405 and parameters: {'hidden_size': 29, 'seq_len': 23, 'num_layers': 2, 'model_name': 'LSTM', 'optimizer': 'MADGRAD', 'lr': 0.00011933126902197484}. Best is trial 1 with value: 0.003713490594236646.[0m
[32m[I 2022-09-02 21:05:17,947][0m Trial 7 pruned. [0m


[Train Epoch: 499] loss: 0.0003


[32m[I 2022-09-02 21:06:25,254][0m Trial 8 finished with value: 0.0028337189753074197 and parameters: {'hidden_size': 24, 'seq_len': 9, 'num_layers': 1, 'model_name': 'LSTM', 'optimizer': 'Adam', 'lr': 0.003728966576758113}. Best is trial 8 with value: 0.0028337189753074197.[0m
[32m[I 2022-09-02 21:06:25,531][0m Trial 9 pruned. [0m


[Train Epoch: 499] loss: 0.0003


[32m[I 2022-09-02 21:08:12,173][0m Trial 10 finished with value: 0.004076116074429592 and parameters: {'hidden_size': 9, 'seq_len': 32, 'num_layers': 2, 'model_name': 'LSTM', 'optimizer': 'MADGRAD', 'lr': 0.009978162406475699}. Best is trial 8 with value: 0.0028337189753074197.[0m
[32m[I 2022-09-02 21:08:12,355][0m Trial 11 pruned. [0m
[32m[I 2022-09-02 21:08:12,929][0m Trial 12 pruned. [0m
[32m[I 2022-09-02 21:08:13,338][0m Trial 13 pruned. [0m
[32m[I 2022-09-02 21:08:14,085][0m Trial 14 pruned. [0m
[32m[I 2022-09-02 21:08:25,216][0m Trial 15 pruned. [0m
[32m[I 2022-09-02 21:08:25,721][0m Trial 16 pruned. [0m


[Train Epoch: 499] loss: 0.0004


[32m[I 2022-09-02 21:10:23,336][0m Trial 17 finished with value: 0.0033652586047537626 and parameters: {'hidden_size': 21, 'seq_len': 28, 'num_layers': 1, 'model_name': 'RNN', 'optimizer': 'Adam', 'lr': 0.001198102635902164}. Best is trial 8 with value: 0.0028337189753074197.[0m
[32m[I 2022-09-02 21:10:23,583][0m Trial 18 pruned. [0m
[32m[I 2022-09-02 21:10:24,113][0m Trial 19 pruned. [0m
[32m[I 2022-09-02 21:10:35,568][0m Trial 20 pruned. [0m
[32m[I 2022-09-02 21:10:35,782][0m Trial 21 pruned. [0m
[32m[I 2022-09-02 21:10:35,991][0m Trial 22 pruned. [0m
[32m[I 2022-09-02 21:11:25,138][0m Trial 23 finished with value: 0.0073363186209462585 and parameters: {'hidden_size': 18, 'seq_len': 9, 'num_layers': 5, 'model_name': 'RNN', 'optimizer': 'RMSprop', 'lr': 0.0017069696219720288}. Best is trial 8 with value: 0.0028337189753074197.[0m


[Train Epoch: 499] loss: 0.0005
[Train Epoch: 499] loss: 0.0003


[32m[I 2022-09-02 21:12:20,881][0m Trial 24 finished with value: 0.005653098656330258 and parameters: {'hidden_size': 17, 'seq_len': 10, 'num_layers': 2, 'model_name': 'LSTM', 'optimizer': 'Adam', 'lr': 0.003569389270723378}. Best is trial 8 with value: 0.0028337189753074197.[0m
[32m[I 2022-09-02 21:12:21,623][0m Trial 25 pruned. [0m
[32m[I 2022-09-02 21:12:22,051][0m Trial 26 pruned. [0m
[32m[I 2022-09-02 21:12:23,147][0m Trial 27 pruned. [0m
[32m[I 2022-09-02 21:12:32,144][0m Trial 28 pruned. [0m
[32m[I 2022-09-02 21:12:32,657][0m Trial 29 pruned. [0m
[32m[I 2022-09-02 21:12:35,324][0m Trial 30 pruned. [0m
[32m[I 2022-09-02 21:13:08,372][0m Trial 31 pruned. [0m
[32m[I 2022-09-02 21:13:08,706][0m Trial 32 pruned. [0m
[32m[I 2022-09-02 21:13:09,058][0m Trial 33 pruned. [0m
[32m[I 2022-09-02 21:13:09,451][0m Trial 34 pruned. [0m
[32m[I 2022-09-02 21:13:11,476][0m Trial 35 pruned. [0m
[32m[I 2022-09-02 21:13:11,773][0m Trial 36 pruned. [0m
[32m[I 2

In [18]:
pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print("num_trials_conducted: ", len(study.trials))
print("num_trials_pruned: ", len(pruned_trials))
print("num_trials_completed: ", len(complete_trials))

trial = study.best_trial
print("[+] results: -----------------------------------------")
print(" | results from best trial:")

print(" | total loss: ", trial.value)
print(" | hyperparameters: ")
for key, value in trial.params.items():
    print(f" | {key}: {value}")
print(" +----------------------------------------------------")
    

num_trials_conducted:  50
num_trials_pruned:  39
num_trials_completed:  11
[+] results: -----------------------------------------
 | results from best trial:
 | total loss:  0.0028337189753074197
 | hyperparameters: 
 | hidden_size: 24
 | lr: 0.003728966576758113
 | model_name: LSTM
 | num_layers: 1
 | optimizer: Adam
 | seq_len: 9
 +----------------------------------------------------


In [23]:
optuna.visualization.plot_optimization_history(study)


In [25]:
optuna.visualization.plot_parallel_coordinate(study)


In [24]:
# 하이퍼파라미터 중요도
optuna.importance.get_param_importances(study)
optuna.visualization.plot_param_importances(study)

In [26]:
optuna.visualization.plot_contour(study, params=["optimizer", "lr"])