In [1]:
import os
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, Any, List, Dict, Tuple, Union
import itertools
import json
import random

import pandas as pd
import numpy as np
from numpy import floating

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import TensorDataset, DataLoader

In [2]:
EXPERIMENT_NAME = 'lstm_grid_search_2'

In [3]:
DATETIME_COLUMN = 'candle_date_time_kst'
# TARGET_COLUMN = 'win_or_lose'

# CHECKPOINT_PATH = '../model_checkpoints/simple_time_features'
CHECKPOINT_PATH = '../model_checkpoints/lstm_IOTA'
if not os.path.exists(CHECKPOINT_PATH):
  os.makedirs(CHECKPOINT_PATH)

TRAIN_DATA_FILE_NAME = 'IOTA_1m_3000000_2025-01-14T23:04:51+09:00.parquet_20250114230451.parquet'
# TEST_DATA_FILE_NAME = 'IOTA_1s_2000_2025-01-12T23:21:27+09:00.parquet_20250112232127.parquet'

# Parameters
# INPUT_LENGTH = 60  # Number of past time steps to use as input
# OUTPUT_LENGTH = 12  # Number of future time steps to predict
# BATCH_SIZE = 32
# LEARNING_RATE = 5e-4
# EPOCHS = 10

SEQUENCE_LENGTH = 24 * 4 * 4
PREDICTION_LENGTH = 24 * 4
LABEL_LENGTH = 24 * 4


BATCH_SIZE = 128
PATIENCE = 15  # Early stopping patience


NUM_BATCHES_PER_EPOCH = 100
EPOCHS = 50
LEARNING_RATE = 5e-4
SCALING = 'std'

In [4]:
if torch.backends.mps.is_available():
    DEVICE = 'mps'
elif torch.cuda.is_available():
    DEVICE = 'cuda'
else:
    DEVICE = 'cpu'

In [5]:
class DataUtils:
  
  default_path = os.path.join(Path(os.getcwd()).parent, 'data')
  
  @staticmethod
  def load_parquet(file_name: str, file_dir: Optional[str] = None):
    if not file_dir:
        file_dir = DataUtils.default_path
        
    path = os.path.join(file_dir, file_name)

    if not os.path.exists(path) or file_name.split('.')[-1] != 'parquet':
        return

    print(f'Loading parquet file from: {path}')

    return pd.read_parquet(path)
  
  @staticmethod
  def feature_engineering(df: Optional[pd.DataFrame], prediction_horizon) -> pd.DataFrame:
    if df is None:
      return pd.DataFrame()

    df['return_1m'] = df['mid_price'].pct_change(1)
    df['return_5m'] = df['mid_price'].pct_change(5)
    df['return_10m'] = df['mid_price'].pct_change(10)

    # 2. 이동평균 Feature
    df['ma_5'] = df['mid_price'].rolling(window=5).mean()
    df['ma_10'] = df['mid_price'].rolling(window=10).mean()
    df['ma_30'] = df['mid_price'].rolling(window=30).mean()

    # 3. 이동 표준편차 Feature (변동성)
    df['std_5'] = df['mid_price'].rolling(window=5).std()
    df['std_10'] = df['mid_price'].rolling(window=10).std()

    # 4. 거래량 Feature
    df['volume_change_1m'] = df['candle_acc_trade_volume'].pct_change(1)
    df['volume_ma_5'] = df['candle_acc_trade_volume'].rolling(5).mean()
    df['volume_ratio'] = df['candle_acc_trade_volume'] / (df['volume_ma_5'] + 1e-9)

    # 5. 가격 구조 Feature
    df['high_low_spread'] = df['high_price'] - df['low_price']
    df['is_bullish'] = (df['mid_price'] > df['opening_price']).astype(int)
    df['body_size'] = np.abs(df['opening_price'] - df['mid_price'])
    df['body_to_range'] = df['body_size'] / (df['high_price'] - df['low_price'] + 1e-9)

    # 6. 기술적 지표 Feature

    # Relative Strength Index (RSI)
    window_length = 14
    delta = df['mid_price'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window_length).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window_length).mean()
    RS = gain / (loss + 1e-9)
    df['RSI'] = 100 - (100 / (1 + RS))

    # MACD
    ema12 = df['mid_price'].ewm(span=12, adjust=False).mean()
    ema26 = df['mid_price'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema12 - ema26
    df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

    # 7. 시간 정보 Feature (timestamp 있어야 가능)
    # 예: df['timestamp'] 가 datetime 타입이라고 가정
    if 'timestamp' in df.columns:
      df['timestamp'] = pd.to_datetime(df['timestamp'])
      df['hour'] = df['timestamp'].dt.hour
      df['minute'] = df['timestamp'].dt.minute
      df['dayofweek'] = df['timestamp'].dt.dayofweek

    # 8. Target 생성 (10분 후 수익률)
    # prediction_horizon = 10
    df['target_return'] = (df['mid_price'].shift(-prediction_horizon) - df['mid_price']) / df['mid_price']

    # 9. NaN 제거
    df = df.dropna().reset_index(drop=True)
    
    return df

In [6]:
data = DataUtils.load_parquet(TRAIN_DATA_FILE_NAME)

Loading parquet file from: /Users/minjiwon/upbase-data-server/data/IOTA_1m_3000000_2025-01-14T23:04:51+09:00.parquet_20250114230451.parquet


In [7]:
features = [
    'worst_profit_rate_before',
    'opening_price', 'high_price', 'low_price', 'mid_price',
    'candle_acc_trade_volume',
    'return_1m', 'return_5m', 'return_10m',
    'ma_5', 'ma_10', 'ma_30',
    'std_5', 'std_10',
    'volume_change_1m'
]

In [8]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout=0.0):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(
            input_dim,
            hidden_dim,
            num_layers,
            dropout=0.0 if hidden_dim == 1 else dropout,
            batch_first=True
        )
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # 마지막 시점 hidden state만 사용 -> (batch_size, 1, input_dim)
        out = self.fc(out)
        return out
    

class ScheduledSamplingLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout=0.0):
        super().__init__()
        self.lstm = nn.LSTM(
            input_dim, hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
        )
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, input_seq, target_seq=None, teacher_forcing_ratio=1.0):
        """
        input_seq: (batch_size, window_size, input_dim)
        target_seq: (batch_size, horizon, input_dim) — horizon-step future
        """
        
        if target_seq is None or teacher_forcing_ratio == 0.0:
            out, _ = self.lstm(input_seq)
            out = out[:, -1, :]  # 마지막 시점 hidden state만 사용
            out = self.fc(out)
            return out
            

        # teacher forcing
        outputs = []
        
        _, (hidden, cell) = self.lstm(input_seq)
        input_t = input_seq[:, -1:, :]  # shape: (batch, 1, input_dim)
        horizon = target_seq.size(1)
        
        for t in range(horizon):
            output, (hidden, cell) = self.lstm(input_t, (hidden, cell))
            pred = self.fc(output)  # (batch, 1, output_dim)
            outputs.append(pred)

            # Scheduled Sampling
            if random.random() < teacher_forcing_ratio:
                input_t = target_seq[:, t:t+1]
            else:
                input_t = pred.detach()  # 모델 예측 사용 (detach 해야 그래디언트 끊김)

        outputs = torch.cat(outputs, dim=1)  # (batch_size, horizon, output_dim)
        return outputs

In [13]:
FloatType = floating[Any]

@dataclass
class GridSearchResult:
    
    lr: float
    hidden_dim: int
    num_layers: int
    output_dim: int
    window_size: int
    horizon: int
    scheduled_sampling: bool
    best_model: str
    valid_loss: FloatType
    test_loss: FloatType

    def __str__(self):
        return f"lr={self.lr}, hidden_dim={self.hidden_dim}, num_layers={self.num_layers}, output_dim={self.output_dim}, window_size={self.window_size}, horizon={self.horizon}, valid_loss={self.valid_loss}, test_loss={self.test_loss}, best_model={self.best_model}, scheduled_sampling={self.scheduled_sampling}"
    
    def __repr__(self):
        return f"GridSearchResult(lr={self.lr}, hidden_dim={self.hidden_dim}, num_layers={self.num_layers}, output_dim={self.output_dim}, window_size={self.window_size}, horizon={self.horizon}, valid_loss={self.valid_loss}, test_loss={self.test_loss}, best_model={self.best_model}), scheduled_sampling={self.scheduled_sampling}"
    
    def __eq__(self, other):
        if not isinstance(other, GridSearchResult):
            return NotImplemented
        
        return (
            self.lr == other.lr and
            self.hidden_dim == other.hidden_dim and
            self.num_layers == other.num_layers and
            self.output_dim == other.output_dim and
            self.window_size == other.window_size and
            self.horizon == other.horizon and
            self.valid_loss == other.valid_loss and
            self.test_loss == other.test_loss and
            self.best_model == other.best_model and
            self.scheduled_sampling == other.scheduled_sampling
        )
        
        
@dataclass
class Cell:
    
    lr: float
    hidden_dim: int
    num_layers: int
    output_dim: int
    dropout_rate: float
    window_size: int
    horizon: int
    scheduled_sampling: bool
    train_ratio: float = 0.7
    valid_ratio: float = 0.15
    test_ratio: float = 0.15
    
    def __str__(self):
        return f"lr={self.lr}, hidden_dim={self.hidden_dim}, num_layers={self.num_layers}, output_dim={self.output_dim}, dropout_rate={self.dropout_rate}, window_size={self.window_size}, horizon={self.horizon}, scheduled_sampling={self.scheduled_sampling}, train_ratio={self.train_ratio}, valid_ratio={self.valid_ratio}, test_ratio={self.test_ratio}"
    
    def __repr__(self):
        return f"Cell(lr={self.lr}, hidden_dim={self.hidden_dim}, num_layers={self.num_layers}, output_dim={self.output_dim}, dropout_rate={self.dropout_rate}, window_size={self.window_size}, horizon={self.horizon}), scheduled_sampling={self.scheduled_sampling}, train_ratio={self.train_ratio}, valid_ratio={self.valid_ratio}, test_ratio={self.test_ratio}"
    
    def __eq__(self, other):
        if not isinstance(other, Cell):
            return NotImplemented   
        
        return (
            self.lr == other.lr and
            self.hidden_dim == other.hidden_dim and
            self.num_layers == other.num_layers and
            self.output_dim == other.output_dim and
            self.dropout_rate == other.dropout_rate and
            self.window_size == other.window_size and
            self.horizon == other.horizon
        )
    
    def shorthand(self):
        return f"{self.lr}_{self.hidden_dim}_{self.num_layers}_{self.output_dim}_{self.dropout_rate}_{self.window_size}_{self.horizon}_{self.scheduled_sampling}"

@dataclass 
class Grid:
    
    lr: List[float]
    hidden_dim: List[int]
    num_layers: List[int]
    output_dim: List[int]
    dropout_rate: List[float]
    window_size: List[int]
    horizon: List[int]
    scheduled_sampling: List[bool]
    
    def __str__(self):
        return f"lr={self.lr}, hidden_dim={self.hidden_dim}, num_layers={self.num_layers}, output_dim={self.output_dim}, dropout_rate={self.dropout_rate}, window_size={self.window_size}, horizon={self.horizon}, scheduled_sampling={self.scheduled_sampling}"
    
    def __repr__(self):
        return f"Grid(lr={self.lr}, hidden_dim={self.hidden_dim}, num_layers={self.num_layers}, output_dim={self.output_dim}, dropout_rate={self.dropout_rate}, window_size={self.window_size}, horizon={self.horizon}, scheduled_sampling={self.scheduled_sampling})"
    
    def __eq__(self, other) :
        if not isinstance(other, Grid):
            return NotImplemented
        
        return (
            self.lr == other.lr and
            self.hidden_dim == other.hidden_dim and
            self.num_layers == other.num_layers and
            self.output_dim == other.output_dim and
            self.dropout_rate == other.dropout_rate and
            self.window_size == other.window_size and
            self.horizon == other.horizon and 
            self.scheduled_sampling == other.scheduled_sampling
        )
        
    def includes(self, other):
        if isinstance(other, Cell):
            return (
                other.lr in set(self.lr) and
                other.hidden_dim in set(self.hidden_dim) and
                other.num_layers in set(self.num_layers) and
                other.output_dim in set(self.output_dim) and
                other.dropout_rate in set(self.dropout_rate) and
                other.window_size in set(self.window_size) and
                other.horizon in set(self.horizon) and 
                other.scheduled_sampling in set(self.scheduled_sampling)
            )
            
        if isinstance(other, Grid):
            return (
                set(self.lr).issuperset(other.lr) and
                set(self.hidden_dim).issuperset(other.hidden_dim) and
                set(self.num_layers).issuperset(other.num_layers) and
                set(self.output_dim).issuperset(other.output_dim) and
                set(self.dropout_rate).issuperset(other.dropout_rate) and
                set(self.window_size).issuperset(other.window_size) and
                set(self.horizon).issuperset(other.horizon) and
                set(self.scheduled_sampling).issuperset(other.scheduled_sampling)
            )
            
        return NotImplemented
        
    def to_cells(self):
        return [
            Cell(
                lr=lr,
                hidden_dim=hidden_dim,
                num_layers=num_layers,
                output_dim=output_dim,
                dropout_rate=dropout_rate,
                window_size=window_size,
                horizon=horizon,
                scheduled_sampling=scheduled_sampling,
            )
            for (lr, hidden_dim, num_layers, output_dim, dropout_rate, window_size, horizon, scheduled_sampling) 
            in (itertools.product(
                self.lr,
                self.hidden_dim,
                self.num_layers,
                self.output_dim,
                self.dropout_rate,
                self.window_size,
                self.horizon,
                self.scheduled_sampling,
            ))
        ]
        

class Tester:
    
    @staticmethod
    def test(params: Cell):
        print("Making dataset...", end='\r', flush=True)
        # 기본 세팅
        # window_size = 60  # 과거 60개 (60분) 시점 사용
        # horizon = 10      # 10분 후 수익률 예측
        
        data_pp = DataUtils.feature_engineering(data, prediction_horizon=params.horizon)
        

        # feature/target 준비
        feature_cols = features  # 아까 정리한 feature 리스트
        target_col = 'target_return'

        num_data = int(len(data_pp) * 0.1)
        data_pppp = data_pp.iloc[-num_data:]

        X_all = data_pppp[feature_cols].values
        y_all = data_pppp[target_col].values

        # 시퀀스 데이터 만들기
        X_seq = []
        y_seq = []

        for i in range(params.window_size, len(data_pppp) - params.horizon):
            X_seq.append(X_all[i - params.window_size:i])
            y_seq.append(y_all[i + params.horizon])

        X_seq = np.array(X_seq)
        y_seq = np.array(y_seq)

        # print(f'data shape: {X_seq.shape}, {y_seq.shape}')  # (samples, time_steps, feature_dim), (samples,)

        # train_ratio = 0.7
        # valid_ratio = 0.15
        # test_ratio = 0.15

        n_total = len(X_seq)
        n_train = int(n_total * params.train_ratio)
        n_valid = int(n_total * params.valid_ratio)

        X_train, y_train = X_seq[:n_train], y_seq[:n_train]
        X_valid, y_valid = X_seq[n_train:n_train+n_valid], y_seq[n_train:n_train+n_valid]
        X_test, y_test = X_seq[n_train+n_valid:], y_seq[n_train+n_valid:]

        # print(X_train.shape, X_valid.shape, X_test.shape)
        
        print("Preparing dataloaders...", end='\r', flush=True)
        

        # 1. Tensor 변환
        device = DEVICE

        X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
        y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)

        X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32).to(device)
        y_valid_tensor = torch.tensor(y_valid, dtype=torch.float32).unsqueeze(1).to(device)

        X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
        y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)

        # 2. DataLoader 만들기
        # todo: batch_size 조정
        batch_size = BATCH_SIZE

        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        valid_dataset = TensorDataset(X_valid_tensor, y_valid_tensor)
        test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        
        # 모델 세팅
        input_dim = X_train.shape[2]   # feature 수
        # hidden_dim = 64
        # num_layers = 2
        # output_dim = 1
        
        # 3. 모델, Loss, Optimizer 세팅
        model = (ScheduledSamplingLSTM if params.scheduled_sampling else LSTMModel)(
            input_dim, 
            params.hidden_dim, 
            params.num_layers, 
            params.output_dim
        ).to(device)
        
        criterion = nn.MSELoss()
        optimizer = optim.Adam(model.parameters(), lr=params.lr, weight_decay=0)
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

        # 4. 학습 루프
        num_epochs = 100
        best_valid_loss = np.inf
        patience_counter = 0
        
        best_checkpoint: str = ''
        
        for epoch in range(num_epochs):
            model.train()
            train_losses = []
            
            teacher_forcing_ratio = max(0.0, 1.0 - 0.05 * epoch) if params.scheduled_sampling else 0.0
            
            for index, (X_batch, y_batch) in enumerate(train_loader):
                print(f"Epoch {epoch+1}/{num_epochs} | ({(index + 1) / len(train_loader) * 100:.02f}% trained) - Train Loss: {np.mean(train_losses) if train_losses else np.inf:.6f}, lr: {scheduler.get_last_lr()}, teacher_forcing: {teacher_forcing_ratio}{' ' * 5}", end='\r', flush=True)
                
                optimizer.zero_grad()
                outputs = model(X_batch, y_batch, teacher_forcing_ratio) if teacher_forcing_ratio > 0.0 else model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
                train_losses.append(loss.item())
            
            # 검증
            model.eval()
            valid_losses = []
            
            with torch.no_grad():
                for index, (X_batch, y_batch) in enumerate(valid_loader):
                    print(f"Epoch {epoch+1}/{num_epochs} | ({(index + 1) / len(valid_loader) * 100:.02f}% validated) - Valid Loss: {np.mean(valid_losses if valid_losses else np.inf):.6f}", end='\r', flush=True)
                    
                    outputs = model(X_batch)
                    loss = criterion(outputs, y_batch)
                    valid_losses.append(loss.item())
            
            print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {np.mean(train_losses):.6f}, Valid Loss: {np.mean(valid_losses):.6f}", end='\r', flush=True)
            
            # avg_train_loss = np.mean(train_losses)
            avg_valid_loss = np.mean(valid_losses)
            scheduler.step(avg_valid_loss)
            
            # Early Stopping & Best Checkpoint 저장
            if avg_valid_loss < best_valid_loss:
                best_valid_loss = avg_valid_loss
                best_checkpoint = os.path.join(CHECKPOINT_PATH, f'{EXPERIMENT_NAME}__{params.shorthand()}__{avg_valid_loss * 10000:.2f}.pth')
                torch.save(model.state_dict(), best_checkpoint)
                print(f"✅ Best model saved at epoch {epoch + 1} with Valid Loss {best_valid_loss:.6f}, lr={scheduler.get_last_lr()}{' ' * 30}", flush=True)
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= PATIENCE:
                    print(f"⏹️ Early stopping triggered at epoch {epoch + 1}{' ' * 80}", flush=True)
                    break
                
        model.load_state_dict(torch.load(best_checkpoint))
        model.eval()

        # Test 데이터로 최종 평가
        test_losses = []

        with torch.no_grad():
            for index, (X_batch, y_batch) in enumerate(test_loader):
                print(f"{(index + 1) / len(test_loader) * 100:.02f}% testing", end='\r', flush=True)
                
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                test_losses.append(loss.item())

        avg_test_loss = np.mean(test_losses)
        print(f'✅ Best Model 기준 최종 Test Loss (MSE): {avg_test_loss:.6f}{" " * 50}', flush=True)
        
        return GridSearchResult(
            lr=params.lr,
            hidden_dim=params.hidden_dim,
            num_layers=params.num_layers,
            output_dim=params.output_dim,
            window_size=params.window_size,
            horizon=params.horizon,
            scheduled_sampling=params.scheduled_sampling,
            best_model=best_checkpoint,
            valid_loss=best_valid_loss,
            test_loss=avg_test_loss,
        )


class GridSearch:
    
    def __init__(
        self,
        lr: Union[float, List[float]],
        hidden_dim: Union[int, List[int]],
        num_layers: Union[int, List[int]],
        output_dim: Union[int, List[int]],
        window_size: Union[int, List[int]],
        horizon: Union[int, List[int]],
        dropout_rate: Union[float, List[float]],
        scheduled_sampling: Union[bool, List[bool]],
        exceptions: Optional[Union[Grid, List[Cell]]] = None,
    ):
        self.grid = GridSearch.make_grid(
            lr=lr,
            hidden_dim=hidden_dim,
            num_layers=num_layers,
            output_dim=output_dim,
            window_size=window_size,
            horizon=horizon,
            dropout_rate=dropout_rate,
            scheduled_sampling=scheduled_sampling,
        )
        self.exceptions = exceptions
        self.results = []
        
    @staticmethod
    def make_grid(
        lr,
        hidden_dim,
        num_layers,
        output_dim,
        dropout_rate,
        window_size,
        horizon,
        scheduled_sampling,
    ):
        return Grid(
            lr=lr if isinstance(lr, list) else [lr],
            hidden_dim=hidden_dim if isinstance(hidden_dim, list) else [hidden_dim],
            num_layers=num_layers if isinstance(num_layers, list) else [num_layers],
            output_dim=output_dim if isinstance(output_dim, list) else [output_dim],
            dropout_rate=dropout_rate if isinstance(dropout_rate, list) else [dropout_rate],
            window_size=window_size if isinstance(window_size, list) else [window_size],
            horizon=horizon if isinstance(horizon, list) else [horizon],
            scheduled_sampling=scheduled_sampling if isinstance(scheduled_sampling, list) else [scheduled_sampling],
        )
        
    def run(self):
        
        def _is_exception(params):
            if isinstance(self.exceptions, list):
                return params in self.exceptions
            elif isinstance(self.exceptions, Grid):
                return self.exceptions.includes(params)
            return False
        
        test_count = 0
        
        for params in self.grid.to_cells():
            if params.window_size <= params.horizon:
                print(f"Skipping test with window_size={params.window_size} <= horizon={params.horizon}", flush=True)
                continue
            
            if _is_exception(params):
                print(f"Skipping test with params: {params} (in exceptions)", flush=True)
                continue
            
            test_count += 1
            print(f"[TEST #{test_count}]\n{params}")
            
            result = Tester.test(params)
            
            print(f"Result: {result}", end='\n\n\n', flush=True)
            self.results.append(result)
                                
        return self.results

In [None]:
# batch_size = 128, data_size = 10%

searcher = GridSearch(
  lr=[5e-4],
  hidden_dim=[128, 256],
  num_layers=[2, 3],
  output_dim=[1],
  window_size=[60],
  horizon=[10, 20],
  dropout_rate=[0.0, 0.1, 0.2, 0.3],
  scheduled_sampling=[False],
  # exceptions=Grid(
  #   lr=[1e-3],
  #   hidden_dim=[64],
  #   num_layers=[2],
  #   output_dim=[1],
  #   window_size=[60],
  #   horizon=[10],
  # )
)

result = searcher.run()

[TEST #1]
lr=0.0005, hidden_dim=128, num_layers=2, output_dim=1, dropout_rate=0.0, window_size=60, horizon=10, scheduled_sampling=False, train_ratio=0.7, valid_ratio=0.15, test_ratio=0.15
✅ Best model saved at epoch 1 with Valid Loss 0.000057, lr=[0.0005]                              
✅ Best model saved at epoch 4 with Valid Loss 0.000057, lr=[0.0005]                              
✅ Best model saved at epoch 5 with Valid Loss 0.000056, lr=[0.0005]                              
✅ Best model saved at epoch 6 with Valid Loss 0.000053, lr=[0.0005]                              
✅ Best model saved at epoch 10 with Valid Loss 0.000051, lr=[0.0005]                              
✅ Best model saved at epoch 11 with Valid Loss 0.000051, lr=[0.0005]                              
✅ Best model saved at epoch 15 with Valid Loss 0.000051, lr=[0.0005]                              
✅ Best model saved at epoch 17 with Valid Loss 0.000051, lr=[0.0005]                              
✅ Best model saved at ep

In [None]:
# batch_size = 128, data_size = 20%

searcher = GridSearch(
  lr=[5e-4],
  hidden_dim=[128, 256],
  num_layers=[2, 3],
  output_dim=[1],
  window_size=[60],
  horizon=[10, 20],
  dropout_rate=[0.1, 0.2, 0.3],
  scheduled_sampling=[True, False],
  # exceptions=Grid(
  #   lr=[1e-3],
  #   hidden_dim=[64],
  #   num_layers=[2],
  #   output_dim=[1],
  #   window_size=[60],
  #   horizon=[10],
  # )
)

result = searcher.run()

[TEST #1] 
with params: lr=0.0005, hidden_dim=128, num_layers=2, output_dim=1, dropout_rate=0.0, window_size=120, horizon=20
Preparing dataloaders...

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


✅ Best model saved at epoch 1 with Valid Loss 0.00010303
✅ Best model saved at epoch 2 with Valid Loss 0.00009898
✅ Best model saved at epoch 6 with Valid Loss 0.00009898
✅ Best model saved at epoch 9 with Valid Loss 0.00009898
⏹️ Early stopping triggered at epoch 29lid Loss: 0.000098
✅ Best Model 기준 최종 Test Loss (MSE): 0.000043
Result: lr=0.0005, hidden_dim=128, num_layers=2, output_dim=1, window_size=120, horizon=20, valid_loss=9.768993973602255e-05, test_loss=4.295517223141218e-05, best_model=../model_checkpoints/lstm_IOTA/lstm_grid_search_2__0.0005_128_2_1_0.0_120_20_True__0.98.pth, scheduled_sampling=True


[TEST #2] 
with params: lr=0.0005, hidden_dim=128, num_layers=2, output_dim=1, dropout_rate=0.1, window_size=120, horizon=20
Preparing dataloaders...

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch 1/150 | (46.35% trained) - Train Loss: 0.000058

KeyboardInterrupt: 

In [None]:
with open(f'./results/{EXPERIMENT_NAME}.json', 'w', encoding='utf-8') as f:
    json.dump([d.__dict__ for d in result], f, indent=2)

In [None]:
result_df = pd.DataFrame([d.__dict__ for d in result])
result_df = result_df.sort_values(by='test_loss', ascending=True)
result_df

Unnamed: 0,lr,hidden_dim,num_layers,output_dim,window_size,horizon,best_model,valid_loss,test_loss
9,0.001,128,1,1,60,20,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
13,0.001,128,2,1,60,20,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
28,0.0005,128,2,1,60,10,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
5,0.001,64,2,1,60,20,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
20,0.0005,64,2,1,60,10,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
17,0.0005,64,1,1,60,20,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
12,0.001,128,2,1,60,10,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
24,0.0005,128,1,1,60,10,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
4,0.001,64,2,1,60,10,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05
29,0.0005,128,2,1,60,20,../model_checkpoints/lstm_IOTA/0.1__0.00.pth,5.1e-05,3.5e-05


In [None]:
result_df.to_csv('./results/grid_search_1.csv', index=True)