In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler

# [1] --- DataHandler ÌÅ¥ÎûòÏä§ Ï†ïÏùò (V2: Scaler + zfill) ---
# (Phase 2-AÏôÄ 2-CÍ∞Ä Ìï©Ï≥êÏßÑ ÏµúÏ¢Ö Î≤ÑÏ†Ñ)
import pandas as pd
import numpy as np
import os
import sys

# --- 0. Í≤ΩÎ°ú ÏÑ§Ï†ï ---
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
DATA_DIR = os.path.join(PROJECT_ROOT, "data", "processed")
FINAL_MASTER_FILE = os.path.join(DATA_DIR, "final_master_table_v2.csv")

class DataHandler:
    """
    [V2] ÌëúÏ§ÄÌôî(Standardization)ÏôÄ zfill(6)Ïù¥ Ï†ÅÏö©Îêú DataHandler.
    """
    
    def __init__(self, file_path, train_end_date='2022-12-31'):
        self.file_path = file_path
        self.train_end_date = pd.to_datetime(train_end_date)
        self.data_by_ticker = {}   # ÏõêÎ≥∏ Îç∞Ïù¥ÌÑ∞
        self.scalers_by_ticker = {} # TickerÎ≥Ñ Scaler
        self.tickers = []
        
        self._load_and_process_data()
        self._fit_scalers()
        
    def _load_and_process_data(self):
        try:
            # 1. dtype=strÎ°ú ÏùΩÍ∏∞
            df = pd.read_csv(
                self.file_path, 
                parse_dates=['date'],
                dtype={'ticker': str} 
            )
            # 2. zfill(6)Î°ú '0' Ï±ÑÏö∞Í∏∞
            df['ticker'] = df['ticker'].str.zfill(6)
            df = df.set_index('date')
            
            self.tickers = df['ticker'].unique()
            
            for ticker in self.tickers:
                ticker_df = df[df['ticker'] == ticker].copy()
                channel_cols = [col for col in ticker_df.columns if col not in ['ticker']]
                self.data_by_ticker[ticker] = ticker_df[channel_cols]
            
            print(f"[DataHandler V2] Success: Loaded {len(self.tickers)} tickers.")
            print(f"[DataHandler V2] Available tickers: {self.tickers}")

        except Exception as e:
            print(f"[DataHandler V2] Error loading data: {e}")

    def _fit_scalers(self):
        """
        [Data Leakage Î∞©ÏßÄ] ÌõàÎ†® Îç∞Ïù¥ÌÑ∞Î°úÎßå ScalerÎ•º ÌïôÏäµ(fit)
        """
        print(f"[DataHandler V2] Fitting scalers using data up to {self.train_end_date.date()}...")
        for ticker in self.tickers:
            train_data = self.data_by_ticker[ticker].loc[:self.train_end_date]
            if train_data.empty:
                print(f"  > Warning: No training data for {ticker}.")
                continue
            
            scaler = StandardScaler()
            scaler.fit(train_data) # 'fit'ÏùÄ ÌõàÎ†® Îç∞Ïù¥ÌÑ∞Î°úÎßå!
            self.scalers_by_ticker[ticker] = scaler
        print("[DataHandler V2] Scalers fitted.")

    def get_scaled_data_by_ticker(self, ticker):
        """
        'transform'ÏùÄ Ï†ÑÏ≤¥ Îç∞Ïù¥ÌÑ∞Ïóê Ï†ÅÏö©ÌïòÏó¨ ÌëúÏ§ÄÌôîÎêú DF Î∞òÌôò
        """
        if ticker not in self.scalers_by_ticker:
            print(f"[DataHandler V2] Error: No scaler for {ticker}")
            return None
        
        original_data = self.data_by_ticker[ticker]
        scaler = self.scalers_by_ticker[ticker]
        
        scaled_data_np = scaler.transform(original_data)
        
        scaled_df = pd.DataFrame(
            scaled_data_np, 
            index=original_data.index, 
            columns=original_data.columns
        )
        return scaled_df

    def get_all_tickers(self):
        return self.tickers


In [2]:
# ============================================================
# 1. Í∏∞Î≥∏ import + ÎîîÎ∞îÏù¥Ïä§ ÏÑ§Ï†ï
# ============================================================
import os
import sys
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("[INFO] Using device:", device)


[INFO] Using device: cuda


In [3]:
# ============================================================
# 2. Í≤ΩÎ°ú ÏÑ§Ï†ï (ÌîÑÎ°úÏ†ùÌä∏ Íµ¨Ï°∞Ïóê ÎßûÍ≤å ÌïÑÏöîÏãú ÏàòÏ†ï)
# ============================================================
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
DATA_DIR     = os.path.join(PROJECT_ROOT, "data", "processed")
MASTER_TABLE_PATH = os.path.join(DATA_DIR, "final_master_table_v2.csv")

GPT2_PATH       = os.path.join(PROJECT_ROOT, "pretrained_models", "gpt2")
TIME_LLM_ROOT   = os.path.join(PROJECT_ROOT, "external", "time-llm")

if TIME_LLM_ROOT not in sys.path:
    sys.path.append(TIME_LLM_ROOT)

print("[INFO] PROJECT_ROOT:", PROJECT_ROOT)
print("[INFO] DATA_DIR    :", DATA_DIR)
print("[INFO] MASTER_TBL  :", MASTER_TABLE_PATH)
print("[INFO] GPT2_PATH   :", GPT2_PATH)


[INFO] PROJECT_ROOT: /workspace/ship-ai
[INFO] DATA_DIR    : /workspace/ship-ai/data/processed
[INFO] MASTER_TBL  : /workspace/ship-ai/data/processed/final_master_table_v2.csv
[INFO] GPT2_PATH   : /workspace/ship-ai/pretrained_models/gpt2


In [4]:

MASTER_TABLE_PATH = FINAL_MASTER_FILE

data_handler = DataHandler(MASTER_TABLE_PATH, train_end_date='2022-12-31')

# ÏïÑÎ¨¥ Ìã∞Ïª§ ÌïòÎÇò Í≥®ÎùºÏÑú (Ïòà: '010140')
ticker = '010140'

scaled_df = data_handler.get_scaled_data_by_ticker(ticker)

print("Ïª¨Îüº ÏàúÏÑú ÌôïÏù∏:")
for i, col in enumerate(scaled_df.columns):
    print(f"{i}: {col}")


[DataHandler V2] Success: Loaded 6 tickers.
[DataHandler V2] Available tickers: ['010140' '010620' '329180' '042660' '443060' '009540']
[DataHandler V2] Fitting scalers using data up to 2022-12-31...
[DataHandler V2] Scalers fitted.
Ïª¨Îüº ÏàúÏÑú ÌôïÏù∏:
0: close_log
1: ret_1d
2: trading_volume_log
3: roe
4: real_debt_ratio
5: new_order_event_impulse
6: new_order_count_stair
7: bdi_proxy
8: wti
9: newbuild_proxy_2015_100
10: imo_event_impulse
11: imo_event_decay


In [5]:
# ============================================================
# 3. (ÏÑ†ÌÉù) ÏΩòÏÜî Î°úÍ∑∏Î•º ÌååÏùºÎ°úÎèÑ Ï†ÄÏû•ÌïòÎäî Logger ÏÑ§Ï†ï
# ============================================================
LOG_DIR = os.path.join(PROJECT_ROOT, "logs")
os.makedirs(LOG_DIR, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
LOG_PATH = os.path.join(LOG_DIR, f"train_log_{timestamp}.txt")

class Logger(object):
    def __init__(self, file_path):
        self.terminal = sys.stdout
        self.log = open(file_path, "a", encoding="utf-8")

    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)
        self.log.flush()

    def flush(self):
        pass

sys.stdout = Logger(LOG_PATH)
print(f"[LOGGING] Training logs will be saved to: {LOG_PATH}")


[LOGGING] Training logs will be saved to: /workspace/ship-ai/logs/train_log_20251128_060857.txt
[INFO] TimeLLM Î™®Îç∏ ÏûÑÌè¨Ìä∏ ÏÑ±Í≥µ

[PHASE 2] DataHandler Ï¥àÍ∏∞Ìôî Î∞è ÏúàÎèÑÏö∞ ÏÉùÏÑ±
[DataHandler V2] Success: Loaded 6 tickers.
[DataHandler V2] Available tickers: ['010140' '010620' '329180' '042660' '443060' '009540']
[DataHandler V2] Fitting scalers using data up to 2022-12-31...
[DataHandler V2] Scalers fitted.
[INFO] Ï†ÑÏ≤¥ Ìã∞Ïª§ Ïàò: 6
[INFO] Ïòà: ['010140' '010620' '329180' '042660' '443060']
  - 010140 ÏúàÎèÑÏö∞ ÏÉùÏÑ±: X=(1259, 120, 12), Y=(1259, 10, 12)
  - 010620 ÏúàÎèÑÏö∞ ÏÉùÏÑ±: X=(1259, 120, 12), Y=(1259, 10, 12)
[DataHandler V2] Error: No scaler for 329180
  - 042660 ÏúàÎèÑÏö∞ ÏÉùÏÑ±: X=(1259, 120, 12), Y=(1259, 10, 12)
[DataHandler V2] Error: No scaler for 443060
  - 009540 ÏúàÎèÑÏö∞ ÏÉùÏÑ±: X=(794, 120, 12), Y=(794, 10, 12)

[INFO] ÌÜµÌï© ÏúàÎèÑÏö∞ ÌÅ¨Í∏∞: (4571, 120, 12) (4571, 10, 12)
[SPLIT] train=(3199, 120, 12), val=(457, 120, 12), test=(915, 120, 12)
[LOADER]

In [6]:
# ============================================================
# 4. TimeLLM Î™®Îç∏ import
# ============================================================
try:
    import importlib
    import models.TimeLLM
    importlib.reload(models.TimeLLM)
    from models.TimeLLM import Model as TimeLLM
    print("[INFO] TimeLLM Î™®Îç∏ ÏûÑÌè¨Ìä∏ ÏÑ±Í≥µ")
except Exception as e:
    print("[ERROR] TimeLLM import Ïã§Ìå®:", e)
    raise

  from .autonotebook import tqdm as notebook_tqdm


In [10]:

# ============================================================
# 5. Ïä¨ÎùºÏù¥Îî© ÏúàÎèÑÏö∞ Ìï®Ïàò + Dataset Ï†ïÏùò
# ============================================================
def create_sliding_windows(data, input_seq_len, output_seq_len):
    """
    DataFrame(2D: [time, features]) -> (X, y) 3D numpy Î∞∞Ïó¥Î°ú Î≥ÄÌôò
    X: (N, input_seq_len, C)
    y: (N, output_seq_len, C)
    """
    data_np = data.values
    n_samples = len(data_np)
    X, y = [], []

    total_len = input_seq_len + output_seq_len
    for i in range(n_samples - total_len + 1):
        x_win = data_np[i : i + input_seq_len]
        y_win = data_np[i + input_seq_len : i + total_len]
        X.append(x_win)
        y.append(y_win)

    return np.array(X), np.array(y)


class ShipDataset(Dataset):
    def __init__(self, X, Y):
        self.X = torch.FloatTensor(X)
        self.Y = torch.FloatTensor(Y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]


In [11]:
# ============================================================
# 6. Phase 2: DataHandler ‚Üí Ï†Ñ Ï¢ÖÎ™© ÏúàÎèÑÏö∞ ÏÉùÏÑ± ‚Üí Train/Val/Test Î∂ÑÌï†
# ============================================================
print("\n[PHASE 2] DataHandler Ï¥àÍ∏∞Ìôî Î∞è ÏúàÎèÑÏö∞ ÏÉùÏÑ±")

# 6-1) DataHandler Ï¥àÍ∏∞Ìôî
data_handler = DataHandler(MASTER_TABLE_PATH, train_end_date='2022-12-31')

INPUT_SEQ_LEN  = 120
OUTPUT_SEQ_LEN = 10

X_all_list = []
Y_all_list = []

tickers = data_handler.get_all_tickers()
print("[INFO] Ï†ÑÏ≤¥ Ìã∞Ïª§ Ïàò:", len(tickers))
print("[INFO] Ïòà:", tickers[:5])

for t in tickers:
    df_scaled = data_handler.get_scaled_data_by_ticker(t)
    if df_scaled is None or len(df_scaled) < INPUT_SEQ_LEN + OUTPUT_SEQ_LEN:
        continue

    X_t, Y_t = create_sliding_windows(df_scaled, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN)
    X_all_list.append(X_t)
    Y_all_list.append(Y_t)
    print(f"  - {t} ÏúàÎèÑÏö∞ ÏÉùÏÑ±: X={X_t.shape}, Y={Y_t.shape}")

X_all = np.concatenate(X_all_list, axis=0)
Y_all = np.concatenate(Y_all_list, axis=0)
print("\n[INFO] ÌÜµÌï© ÏúàÎèÑÏö∞ ÌÅ¨Í∏∞:", X_all.shape, Y_all.shape)  # (N, 120, C), (N, 10, C)

# 6-2) ÏãúÍ∞Ñ ÏàúÏÑú Í∑∏ÎåÄÎ°ú 7:1:2 Î∂ÑÌï†
total_samples = len(X_all)
train_size = int(total_samples * 0.7)
val_size   = int(total_samples * 0.1)
test_size  = total_samples - train_size - val_size

X_train = X_all[:train_size]
Y_train = Y_all[:train_size]

X_val   = X_all[train_size:train_size+val_size]
Y_val   = Y_all[train_size:train_size+val_size]

X_test  = X_all[train_size+val_size:]
Y_test  = Y_all[train_size+val_size:]

print(f"[SPLIT] train={X_train.shape}, val={X_val.shape}, test={X_test.shape}")

# 6-3) Dataset / DataLoader ÏÉùÏÑ±
BATCH_SIZE = 8

train_dataset = ShipDataset(X_train, Y_train)
val_dataset   = ShipDataset(X_val,   Y_val)
test_dataset  = ShipDataset(X_test,  Y_test)

train_loader_global = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader_global   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader_global  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)

print(f"[LOADER] train={len(train_loader_global)} batches, val={len(val_loader_global)}, test={len(test_loader_global)}")


In [15]:
class Configs:
    def __init__(self):
        # Í∏∞Î≥∏ ÏÑ∏ÌåÖ
        self.task_name = 'long_term_forecast'
        self.is_training = 1
        self.model_id = 'Stock_Prediction'
        self.model = 'TimeLLM'

        # Îç∞Ïù¥ÌÑ∞ Ï∞®Ïõê
        self.seq_len   = 120
        self.label_len = 60
        self.pred_len  = 10
        self.enc_in = 12
        self.dec_in = 12
        self.c_out = 12

        # [ÌïµÏã¨ Î≥ÄÍ≤Ω 1] LLM Î™®Îç∏ ÌíÄÌååÏõå Í∞ÄÎèô (RTX 5090Ïù¥ÎãàÍπå!)
        self.llm_model       = 'GPT2'
        self.llm_model_path = GPT2_PATH
        self.llm_dim    = 768
        self.llm_layers = 8     # (Í∏∞Ï°¥ 6 -> 12 Î≥µÍµ¨: ÏßÄÎä• 2Î∞∞)

        # [ÌïµÏã¨ Î≥ÄÍ≤Ω 2] ÌòÑÎØ∏Í≤Ω Î™®Îìú (High Resolution)
        # 32ÏùºÏî© ÎåÄÏ∂© Î≥¥Îäî Í≤å ÏïÑÎãàÎùº, 8ÏùºÏî© Ï™ºÍ∞úÏÑú ÎîîÌÖåÏùºÏùÑ Ïû°ÏäµÎãàÎã§.
        self.patch_len = 8       # (Í∏∞Ï°¥ 32 -> 8: Ìï¥ÏÉÅÎèÑ 4Î∞∞)
        self.stride    = 4       # (Í∏∞Ï°¥ 16 -> 4: Îçî Ï¥òÏ¥òÌïòÍ≤å)

        # [ÌïµÏã¨ Î≥ÄÍ≤Ω 3] Î™®Îç∏ Îç©Ïπò ÌÇ§Ïö∞Í∏∞
        self.d_model = 512      # (Í∏∞Ï°¥ 256 -> 768)
        self.d_ff    = 512       # (Í∏∞Ï°¥ 256 -> 768, Ï∞®Ïõê ÏóêÎü¨ Î∞©ÏßÄÏö© ÎèôÍ∏∞Ìôî)
        self.n_heads = 12        # (Í∏∞Ï°¥ 12 Ïú†ÏßÄ)
        self.dropout = 0.05     # (0.02 -> 0.05: Î™®Îç∏Ïù¥ Ïª§Ï†∏ÏÑú Í∑úÏ†ú ÏÇ¥Ïßù Ï∂îÍ∞Ä)

        # Prompt / ÎèÑÎ©îÏù∏ ÏÑ§Î™Ö (Rich Prompt Ïú†ÏßÄ)
        self.prompt_domain = 1
        self.content = (
            "Task: Forecast daily closing prices for Korean shipbuilding companies. "
            "Input Data: 12 channels including OHLC prices, trading volume, "
            "and macro-indicators such as Brent oil price, USD/KRW exchange rate, "
            "interest rate, and BDI (Baltic Dry Index). "
            "Context: Shipbuilding stocks are sensitive to oil prices and BDI. "
            "Analyze the 120-day trend, focusing on volatility and correlations, "
            "and predict the next 10 days."
        )

        # Í∏∞ÌÉÄ ÏÑ§Ï†ï
        self.embed   = 'timeF'
        self.freq    = 'd'
        self.factor  = 1
        self.moving_avg = 25
        self.e_layers = 2
        self.d_layers = 1
        self.top_k    = 5


In [19]:
#============================================================
# 8. Î™®Îç∏ Ï¥àÍ∏∞Ìôî
# ============================================================
configs = Configs()
model = TimeLLM(configs)
model.to(device).float()

n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"\n[MODEL] Trainable params: {n_params/1e6:.2f}M")
print(f"[MODEL] Using patch_len={configs.patch_len}, stride={configs.stride}, llm_layers={configs.llm_layers}")

In [27]:
# ============================================================
# 9. ÌïôÏäµ ÏÑ§Ï†ï
# ============================================================
LEARNING_RATE = 1e-4
EPOCHS        = 30
ACCUM_STEPS   = 8

optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
# criterion = nn.MSELoss()
criterion = DirectionalMSELoss(direction_weight=5.0)

print("\n[TRAIN] Start training...")
print(f"  > LR={LEARNING_RATE}, EPOCHS={EPOCHS}, ACCUM_STEPS={ACCUM_STEPS}")


In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

# --------------------------------------------------
# 0. ÏÜêÏã§Ìï®Ïàò / ÏòµÌã∞ÎßàÏù¥Ï†Ä / Ïä§ÏºÄÏ§ÑÎü¨ ÏÑ∏ÌåÖ (ÏòàÏãú)
# --------------------------------------------------
criterion = nn.MSELoss()

optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# --------------------------------------------------
# 1. Horizon = 1 ÌöåÍ∑Ä ÌïôÏäµ Î£®ÌîÑ
#    - target: batch_y[:, 0, 0]  (ÎÇ¥Ïùº close_log)
#    - pred  : outputs[:, 0, 0]  (Î™®Îç∏Ïù¥ ÏòàÏ∏°Ìïú ÎÇ¥Ïùº close_log)
# --------------------------------------------------
print("[TRAIN] Start training (Horizon=1 Regression)...")
print(f"  > LR={LEARNING_RATE}, EPOCHS={EPOCHS}, ACCUM_STEPS={ACCUM_STEPS}")

for epoch in range(1, EPOCHS + 1):
    model.train()
    total_loss = 0.0
    optimizer.zero_grad()

    progress_bar = tqdm(train_loader_global, desc=f"Epoch {epoch}/{EPOCHS}")

    for i, (batch_x, batch_y) in enumerate(progress_bar):
        batch_x = batch_x.to(device).float()   # (B, 120, C)
        batch_y = batch_y.to(device).float()   # (B, 10,  C)

        B, Seq, C = batch_x.shape
        Pred = batch_y.shape[1]                # ÏõêÎûò 10

        # ÎçîÎØ∏ time feature (Ïö∞Î¶¨Îäî Ïïà Ïç®ÏÑú 0ÏúºÎ°ú Îë†)
        dummy_mark_enc = torch.zeros(B, Seq, 4,  device=device)
        dummy_mark_dec = torch.zeros(B, Pred, 4, device=device)
        dummy_dec_in   = torch.zeros(B, Pred, C, device=device)

        # ---- Î™®Îç∏ forward ----
        outputs = model(batch_x, dummy_mark_enc, dummy_dec_in, dummy_mark_dec)
        if isinstance(outputs, tuple):
            outputs = outputs[0]               # TimeLLM ÏùºÎ∂Ä Î≤ÑÏ†ÑÏùÄ (out, attn) ÌäúÌîåÏùÑ Î∞òÌôòÌï®

        # outputs: (B, pred_len, C)  Ïó¨Í∏∞ÏÑú pred_len=10
        # Ïö∞Î¶¨Îäî +1Ïùº(h=0), 0Î≤à Ï±ÑÎÑê(close_log)Îßå ÏÇ¨Ïö©
        pred_next = outputs[:, 0, 0]           # (B,)
        true_next = batch_y[:, 0, 0]           # (B,)

        # ---- Loss Í≥ÑÏÇ∞ (Horizon=1 ÌöåÍ∑Ä) ----
        loss = criterion(pred_next, true_next)

        # ---- Gradient Accumulation ----
        loss = loss / ACCUM_STEPS
        loss.backward()

        if (i + 1) % ACCUM_STEPS == 0:
            optimizer.step()
            optimizer.zero_grad()

        current_loss = loss.item() * ACCUM_STEPS
        total_loss  += current_loss

        current_lr = optimizer.param_groups[0]['lr']
        progress_bar.set_postfix(
            {'loss': f"{current_loss:.5f}", 'lr': f"{current_lr:.6f}"}
        )

    scheduler.step()
    avg_loss = total_loss / len(train_loader_global)
    print(f"[Epoch {epoch}] Avg Loss (h=1 only): {avg_loss:.5f}")


Epoch 1/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.01it/s, loss=0.03388, lr=0.000100]
Epoch 2/20: 100%|‚ñà‚ñà| 400/400 [01:38<00:00,  4.05it/s, loss=0.03825, lr=0.000099]
Epoch 3/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.04it/s, loss=0.01083, lr=0.000098]
Epoch 4/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.01015, lr=0.000095]
Epoch 5/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.01132, lr=0.000090]
Epoch 6/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.17693, lr=0.000085]
Epoch 7/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.02291, lr=0.000079]
Epoch 8/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.02it/s, loss=0.01686, lr=0.000073]
Epoch 9/20: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.02it/s, loss=0.01583, lr=0.000065]
Epoch 10/20: 100%|‚ñà| 400/400 [01:39<00:00,  4.02it/s, loss=0.02338, lr=0.000058]
Epoch 11/20: 100%|‚ñà| 400/400 [01:39<00:00,  4.02it/s, loss=0.03365, lr=0.000050]
Epoch 12/20: 100%|‚ñà| 400/400 [01:39<00:00,  4.02it/s, loss=0.00639,

In [28]:
# ============================================================
# 10. ÌïôÏäµ Î£®ÌîÑ (DirectionalMSELoss Î≤ÑÏ†Ñ)
# ============================================================

for epoch in range(1, EPOCHS+1):
    model.train()
    total_loss = 0.0
    optimizer.zero_grad()

    progress_bar = tqdm(train_loader_global, desc=f"Epoch {epoch}/{EPOCHS}")

    for i, (batch_x, batch_y) in enumerate(progress_bar):
        batch_x = batch_x.to(device).float()  # (B, 120, C)
        batch_y = batch_y.to(device).float()  # (B, 10,  C)

        B, Seq, C = batch_x.shape
        Pred = batch_y.shape[1]

        dummy_mark_enc = torch.zeros(B, Seq, 4,  device=device)
        dummy_mark_dec = torch.zeros(B, Pred, 4, device=device)
        dummy_dec_in   = torch.zeros(B, Pred, C, device=device)

        # --- Î™®Îç∏ forward ---
        outputs = model(batch_x, dummy_mark_enc, dummy_dec_in, dummy_mark_dec)
        if isinstance(outputs, tuple):
            outputs = outputs[0]

        # outputs: (B, pred_len, C)
        # Ïö∞Î¶¨Îäî 0Î≤à Ï±ÑÎÑê(close_log), +1Ïùº(h=0)Îßå ÏÇ¨Ïö©
        preds_full = outputs[:, -configs.pred_len:, :]   # (B, Pred, C)
        pred_next  = preds_full[:, 0, 0]                 # (B,) ÎÇ¥Ïùº ÏòàÏ∏° close_log
        true_next  = batch_y[:, 0, 0]                    # (B,) ÎÇ¥Ïùº Ïã§Ï†ú close_log

        last_val   = batch_x[:, -1, 0]                   # (B,) Ïò§Îäò close_log

        # --- DirectionalMSE loss ---
        loss = criterion(pred_next, true_next, last_val)
        # ----------------------------

        loss = loss / ACCUM_STEPS
        loss.backward()

        if (i + 1) % ACCUM_STEPS == 0:
            optimizer.step()
            optimizer.zero_grad()

        current_loss = loss.item() * ACCUM_STEPS
        total_loss  += current_loss
        current_lr   = optimizer.param_groups[0]['lr']
        progress_bar.set_postfix({'loss': f"{current_loss:.5f}", 'lr': f"{current_lr:.6f}"})

    scheduler.step()
    avg_loss = total_loss / len(train_loader_global)
    print(f"[Epoch {epoch}] Avg Loss: {avg_loss:.5f}")


Epoch 1/30: 100%|‚ñà‚ñà| 400/400 [01:40<00:00,  3.99it/s, loss=0.18332, lr=0.000100]
Epoch 2/30: 100%|‚ñà‚ñà| 400/400 [01:38<00:00,  4.05it/s, loss=0.07885, lr=0.000100]
Epoch 3/30: 100%|‚ñà‚ñà| 400/400 [01:38<00:00,  4.04it/s, loss=0.00916, lr=0.000099]
Epoch 4/30: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.04it/s, loss=0.18374, lr=0.000098]
Epoch 5/30: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.75705, lr=0.000096]
Epoch 6/30: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.05006, lr=0.000093]
Epoch 7/30: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.06173, lr=0.000090]
Epoch 8/30: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.02755, lr=0.000087]
Epoch 9/30: 100%|‚ñà‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.03544, lr=0.000083]
Epoch 10/30: 100%|‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.27663, lr=0.000079]
Epoch 11/30: 100%|‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.04803, lr=0.000075]
Epoch 12/30: 100%|‚ñà| 400/400 [01:39<00:00,  4.03it/s, loss=0.08766,

In [22]:
# ============================================================
# 11. Î™®Îç∏ Ï†ÄÏû•
# ============================================================
SAVE_DIR = os.path.join(PROJECT_ROOT, "models")
os.makedirs(SAVE_DIR, exist_ok=True)
SAVE_PATH = os.path.join(SAVE_DIR, "ship_time_llm_tmp6.pth")
torch.save(model.state_dict(), SAVE_PATH)
print(f"\n[SAVE] Model saved to: {SAVE_PATH}")


In [32]:
import torch
import numpy as np

# ============================================================
# Direction Classification Evaluation (DirectionalMSE Î≤ÑÏ†ÑÏö©)
#  - True: ÎÇ¥Ïùº Ï¢ÖÍ∞ÄÍ∞Ä Ïò§ÎäòÎ≥¥Îã§ ÌÅ¨Î©¥ 1, ÏïÑÎãàÎ©¥ 0
#  - Model: pred_next - last_val Ïùò Î∂ÄÌò∏Î°ú ÏÉÅÏäπ/ÌïòÎùΩ ÌåêÎã®
#  - Naive: Ïò§Îäò ÏàòÏùµÎ•†(Ïò§Îäò-Ïñ¥Ï†ú)Ïùò Î∂ÄÌò∏Î°ú ÎÇ¥Ïùº Î∞©Ìñ• Í∞ÄÏ†ï
# ============================================================
def eval_direction(loader, name="train"):
    model.to(device)
    model.eval()

    total = 0

    # Î™®Îç∏/naive Í∞ÅÍ∞Å TP/FP/TN/FN
    TP_m = FP_m = TN_m = FN_m = 0
    TP_n = FP_n = TN_n = FN_n = 0

    with torch.no_grad():
        for batch_x, batch_y in loader:
            batch_x = batch_x.to(device).float()  # (B, 120, C)
            batch_y = batch_y.to(device).float()  # (B, 10,  C)

            B, Seq, C = batch_x.shape
            Pred = batch_y.shape[1]

            # ---------- 1) True Label (y_bin) ----------
            close_today    = batch_x[:, -1, 0]   # (B,)
            close_tomorrow = batch_y[:, 0, 0]    # (B,)

            diff = close_tomorrow - close_today
            y_bin = (diff > 0).float()          # ÏÉÅÏäπ=1, ÌïòÎùΩ/Ìö°Î≥¥=0

            # ---------- 2) Î™®Îç∏ ÏòàÏ∏° ----------
            dummy_mark_enc = torch.zeros(B, Seq, 4, device=device)
            dummy_mark_dec = torch.zeros(B, Pred, 4, device=device)
            dummy_dec_in   = torch.zeros(B, Pred, C, device=device)

            outputs = model(batch_x, dummy_mark_enc, dummy_dec_in, dummy_mark_dec)
            if isinstance(outputs, tuple):
                outputs = outputs[0]

            # outputs: (B, pred_len, C)
            preds_full = outputs[:, -configs.pred_len:, :]   # (B, Pred, C)
            pred_next  = preds_full[:, 0, 0]                 # (B,) ÎÇ¥Ïùº ÏòàÏ∏° close_log

            # Î™®Îç∏Ïùò Î∞©Ìñ•: pred_next - close_today
            dir_m = pred_next - close_today                  # (B,)
            y_hat_m = (dir_m > 0).float()                    # ÏÉÅÏäπ=1, ÌïòÎùΩ/Ìö°Î≥¥=0

            # ---------- 3) Naive ÏòàÏ∏° ----------
            # Ïñ¥Ï†ú‚ÜíÏò§Îäò ÏàòÏùµÎ•†Ïùò Î∂ÄÌò∏Î•º ÎÇ¥ÏùºÏóêÎèÑ Í∑∏ÎåÄÎ°ú Í∞ÑÎã§Í≥† Í∞ÄÏ†ï
            last_ret = batch_x[:, -1, 0] - batch_x[:, -2, 0] # (B,)
            y_hat_n  = (last_ret > 0).float()                # (B,)

            # ---------- 4) TP/FP/TN/FN Ïπ¥Ïö¥Ìä∏ ----------
            total += B

            # Î™®Îç∏
            TP_m += ((y_hat_m == 1) & (y_bin == 1)).sum().item()
            FP_m += ((y_hat_m == 1) & (y_bin == 0)).sum().item()
            TN_m += ((y_hat_m == 0) & (y_bin == 0)).sum().item()
            FN_m += ((y_hat_m == 0) & (y_bin == 1)).sum().item()

            # naive
            TP_n += ((y_hat_n == 1) & (y_bin == 1)).sum().item()
            FP_n += ((y_hat_n == 1) & (y_bin == 0)).sum().item()
            TN_n += ((y_hat_n == 0) & (y_bin == 0)).sum().item()
            FN_n += ((y_hat_n == 0) & (y_bin == 1)).sum().item()

    def metrics(TP, FP, TN, FN):
        denom = max(TP + FP + TN + FN, 1)
        acc   = (TP + TN) / denom
        prec  = TP / max(TP + FP, 1)
        rec   = TP / max(TP + FN, 1)
        return acc, prec, rec

    acc_m, prec_m, rec_m = metrics(TP_m, FP_m, TN_m, FN_m)
    acc_n, prec_n, rec_n = metrics(TP_n, FP_n, TN_n, FN_n)

    print(f"\n=== [{name}] Direction Evaluation ===")
    print(f"Ï¥ù ÏÉòÌîå Ïàò: {total}")
    print(f"[Model]  Acc={acc_m*100:5.2f}%  Prec={prec_m*100:5.2f}%  Rec={rec_m*100:5.2f}%")
    print(f"[Naive]  Acc={acc_n*100:5.2f}%  Prec={prec_n*100:5.2f}%  Rec={rec_n*100:5.2f}%")
    print(f"[Model]  TP={TP_m}, FP={FP_m}, TN={TN_m}, FN={FN_m}")
    print(f"[Naive]  TP={TP_n}, FP={FP_n}, TN={TN_n}, FN={FN_n}")
    print("=====================================\n")


In [34]:
# ============================================================
# 13. ÌèâÍ∞Ä Ïã§Ìñâ ÏòàÏãú
# ============================================================
print("\n[Eval] Global ÏÑ±Îä• ÌèâÍ∞Ä ÏãúÏûë")
eval_direction(train_loader_global, "train")
eval_direction(val_loader_global, "val")

In [23]:
# ============================================================
# 12. ÌèâÍ∞Ä Ìï®Ïàò (MSE / DIR%) + Horizon Î∂ÑÏÑù
# ============================================================
import numpy as np

def eval_loader(loader, name="train"):
    model.to(device)
    model.eval()

    mse_model_list = []
    mse_naive_list = []
    dir_model_list = []
    dir_naive_list = []

    with torch.no_grad():
        for batch_x, batch_y in loader:
            batch_x = batch_x.to(device).float()
            batch_y = batch_y.to(device).float()

            B, Seq, C = batch_x.shape
            Pred = batch_y.shape[1]

            dummy_mark_enc = torch.zeros(B, Seq, 4, device=device)
            dummy_mark_dec = torch.zeros(B, Pred, 4, device=device)
            dummy_dec_in   = torch.zeros(B, Pred, C, device=device)

            outputs = model(batch_x, dummy_mark_enc, dummy_dec_in, dummy_mark_dec)
            if isinstance(outputs, tuple):
                outputs = outputs[0]

            f_dim = -1 if configs.c_out == 1 else 0
            preds = outputs[:, -configs.pred_len:, f_dim:]  # (B, Pred, C)

            true = batch_y[:, :, 0]   # (B, Pred)
            pred = preds[:, :, 0]     # (B, Pred)

            # 1) MSE
            mse_model = torch.mean((pred - true)**2).item()
            naive = batch_x[:, -1, 0].unsqueeze(1).repeat(1, Pred)
            mse_naive = torch.mean((naive - true)**2).item()

            mse_model_list.append(mse_model)
            mse_naive_list.append(mse_naive)

            # 2) Î∞©Ìñ• Ï†ïÌôïÎèÑ
            true_ret = true[:, 1:] - true[:, :-1]
            pred_ret = pred[:, 1:] - pred[:, :-1]

            true_sign = torch.sign(true_ret)
            pred_sign = torch.sign(pred_ret)

            last_hist_ret = batch_x[:, -1, 0] - batch_x[:, -2, 0]
            naive_sign = torch.sign(last_hist_ret).unsqueeze(1).repeat(1, Pred-1)

            mask = true_sign != 0
            if mask.sum() == 0:
                continue

            acc_m = (pred_sign[mask] == true_sign[mask]).float().mean().item()
            acc_n = (naive_sign[mask] == true_sign[mask]).float().mean().item()

            dir_model_list.append(acc_m)
            dir_naive_list.append(acc_n)

    avg_mse_model = np.mean(mse_model_list)
    avg_mse_naive = np.mean(mse_naive_list)
    avg_dir_model = np.mean(dir_model_list) * 100
    avg_dir_naive = np.mean(dir_naive_list) * 100

    print(f"[{name}] MSE   model={avg_mse_model:.4f}, naive={avg_mse_naive:.4f}")
    print(f"[{name}] DIR%  model={avg_dir_model:.2f}%, naive={avg_dir_naive:.2f}%")
    print("-" * 60)


def eval_horizon(loader, name="val"):
    model.to(device)
    model.eval()

    Pred = configs.pred_len

    mse_model_h = [[] for _ in range(Pred)]
    mse_naive_h = [[] for _ in range(Pred)]
    dir_model_h = [[] for _ in range(Pred-1)]
    dir_naive_h = [[] for _ in range(Pred-1)]

    with torch.no_grad():
        for batch_x, batch_y in loader:
            batch_x = batch_x.to(device).float()
            batch_y = batch_y.to(device).float()

            B, Seq, C = batch_x.shape
            Pred = batch_y.shape[1]

            dummy_mark_enc = torch.zeros(B, Seq, 4, device=device)
            dummy_mark_dec = torch.zeros(B, Pred, 4, device=device)
            dummy_dec_in   = torch.zeros(B, Pred, C, device=device)

            outputs = model(batch_x, dummy_mark_enc, dummy_dec_in, dummy_mark_dec)
            if isinstance(outputs, tuple):
                outputs = outputs[0]

            f_dim = -1 if configs.c_out == 1 else 0
            preds = outputs[:, -configs.pred_len:, f_dim:]

            true = batch_y[:, :, 0]
            pred = preds[:, :, 0]
            naive = batch_x[:, -1, 0].unsqueeze(1).repeat(1, Pred)

            # HorizonÎ≥Ñ MSE
            for h in range(Pred):
                mse_m = torch.mean((pred[:, h] - true[:, h])**2).item()
                mse_n = torch.mean((naive[:, h] - true[:, h])**2).item()
                mse_model_h[h].append(mse_m)
                mse_naive_h[h].append(mse_n)

            # HorizonÎ≥Ñ Î∞©Ìñ• Ï†ïÌôïÎèÑ
            true_ret  = true[:, 1:] - true[:, :-1]
            pred_ret  = pred[:, 1:] - pred[:, :-1]
            true_sign = torch.sign(true_ret)
            pred_sign = torch.sign(pred_ret)

            last_hist_ret = batch_x[:, -1, 0] - batch_x[:, -2, 0]
            naive_sign = torch.sign(last_hist_ret).unsqueeze(1).repeat(1, Pred-1)

            for h in range(Pred-1):
                ts = true_sign[:, h]
                ps = pred_sign[:, h]
                ns = naive_sign[:, h]

                mask = ts != 0
                if mask.sum() == 0:
                    continue

                acc_m = (ps[mask] == ts[mask]).float().mean().item()
                acc_n = (ns[mask] == ts[mask]).float().mean().item()

                dir_model_h[h].append(acc_m)
                dir_naive_h[h].append(acc_n)

    mse_model_h = [np.mean(v) if len(v) > 0 else np.nan for v in mse_model_h]
    mse_naive_h = [np.mean(v) if len(v) > 0 else np.nan for v in mse_naive_h]
    dir_model_h = [np.mean(v)*100 if len(v) > 0 else np.nan for v in dir_model_h]
    dir_naive_h = [np.mean(v)*100 if len(v) > 0 else np.nan for v in dir_naive_h]

    print(f"== [{name}] HorizonÎ≥Ñ MSE (h=0ÏùÄ +1ÏùºÏ∞®) ===")
    for h in range(Pred):
        print(f"h+{h+1}: MSE_model={mse_model_h[h]:.4f}, MSE_naive={mse_naive_h[h]:.4f}")

    print(f"\n=== [{name}] HorizonÎ≥Ñ Î∞©Ìñ• Ï†ïÌôïÎèÑ (1~9Ïùº Íµ¨Í∞Ñ) ===")
    for h in range(Pred-1):
        print(f"Íµ¨Í∞Ñ {h+1}->{h+2}: DIR_model={dir_model_h[h]:.2f}%, DIR_naive={dir_naive_h[h]:.2f}%")

    return mse_model_h, mse_naive_h, dir_model_h, dir_naive_h

In [24]:
# ============================================================
# 13. ÌèâÍ∞Ä Ïã§Ìñâ ÏòàÏãú
# ============================================================
print("\n[Eval] Global ÏÑ±Îä• ÌèâÍ∞Ä ÏãúÏûë")
eval_loader(train_loader_global, "train")
eval_loader(val_loader_global,   "val")
eval_horizon(val_loader_global,  "val")


([np.float64(0.036556711821848976),
  np.float64(0.9095412556338927),
  np.float64(1.3724864285182337),
  np.float64(0.9585807690204218),
  np.float64(1.0115850683109),
  np.float64(1.5007414740574514),
  np.float64(1.463560911698331),
  np.float64(1.566891599607108),
  np.float64(1.2905951568632033),
  np.float64(1.3434308393183967)],
 [np.float64(0.030715870161155432),
  np.float64(0.06035709488486614),
  np.float64(0.09029256207627598),
  np.float64(0.12113065682833307),
  np.float64(0.1470897889862656),
  np.float64(0.17020487708264384),
  np.float64(0.19079851161624337),
  np.float64(0.21216568321888818),
  np.float64(0.2335972248672925),
  np.float64(0.2584585758279367)],
 [np.float64(50.205255299806595),
  np.float64(53.427751079715534),
  np.float64(48.111659057181456),
  np.float64(49.50738991128987),
  np.float64(53.56116662251538),
  np.float64(46.35673311763796),
  np.float64(51.754926787368184),
  np.float64(49.32266065272792),
  np.float64(47.72167565493748)],
 [np.float6

In [1]:
def eval_h1(loader, name="train"):
    model.to(device)
    model.eval()

    mse_model_list = []
    mse_naive_list = []
    dir_model_list = []
    dir_naive_list = []

    with torch.no_grad():
        for batch_x, batch_y in loader:
            batch_x = batch_x.to(device).float()
            batch_y = batch_y.to(device).float()

            B, Seq, C = batch_x.shape
            Pred = batch_y.shape[1]

            dummy_mark_enc = torch.zeros(B, Seq, 4, device=device)
            dummy_mark_dec = torch.zeros(B, Pred, 4, device=device)
            dummy_dec_in   = torch.zeros(B, Pred, C, device=device)

            outputs = model(batch_x, dummy_mark_enc, dummy_dec_in, dummy_mark_dec)
            if isinstance(outputs, tuple):
                outputs = outputs[0]

            # +1Ïùº / close_logÎßå ÏÇ¨Ïö©
            pred_next = outputs[:, 0, 0]   # (B,)
            true_next = batch_y[:, 0, 0]   # (B,)

            # MSE
            mse_model = torch.mean((pred_next - true_next)**2).item()
            naive_next = batch_x[:, -1, 0]  # "ÎÇ¥ÏùºÎèÑ Ïò§ÎäòÏù¥Îûë Í∞ôÎã§Í≥† Í∞ÄÏ†ï"
            mse_naive = torch.mean((naive_next - true_next)**2).item()

            mse_model_list.append(mse_model)
            mse_naive_list.append(mse_naive)

            # Î∞©Ìñ• Ï†ïÌôïÎèÑ (ÏÉÅÏäπ/ÌïòÎùΩ)
            true_ret = true_next - batch_x[:, -1, 0]
            pred_ret = pred_next - batch_x[:, -1, 0]

            true_sign = torch.sign(true_ret)
            pred_sign = torch.sign(pred_ret)

            last_ret  = batch_x[:, -1, 0] - batch_x[:, -2, 0]
            naive_sign = torch.sign(last_ret)

            mask = true_sign != 0
            if mask.sum() == 0:
                continue

            acc_m = (pred_sign[mask] == true_sign[mask]).float().mean().item()
            acc_n = (naive_sign[mask] == true_sign[mask]).float().mean().item()

            dir_model_list.append(acc_m)
            dir_naive_list.append(acc_n)

    avg_mse_model = np.mean(mse_model_list)
    avg_mse_naive = np.mean(mse_naive_list)
    avg_dir_model = np.mean(dir_model_list) * 100
    avg_dir_naive = np.mean(dir_naive_list) * 100

    print(f"[{name} h=1] MSE   model={avg_mse_model:.4f}, naive={avg_mse_naive:.4f}")
    print(f"[{name} h=1] DIR%  model={avg_dir_model:.2f}%, naive={avg_dir_naive:.2f}%")
    print("-" * 60)


In [2]:
print("[Eval] Global ÏÑ±Îä• ÌèâÍ∞Ä (h=1 Ï†ÑÏö©)")
eval_h1(train_loader_global, name="train")
eval_h1(val_loader_global,   name="val")


[Eval] Global ÏÑ±Îä• ÌèâÍ∞Ä (h=1 Ï†ÑÏö©)


NameError: name 'train_loader_global' is not defined