In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


### <code>stockprice5_pytorch.npz</code> 파일을 github에서 다운로드 받아서 사용

In [6]:
import json
import urllib.request
import os

# Download data files if not exist
data_urls = {
    'stockprice5_pytorch.npz': 'https://github.com/nongaussian/class-2026-lginnotek-llm/raw/refs/heads/main/stockprice5_pytorch.npz'
}

for filename, url in data_urls.items():
    if not os.path.exists(filename):
        print(f'Downloading {filename}...')
        urllib.request.urlretrieve(url, filename)
        print(f'{filename} downloaded.')
    else:
        print(f'{filename} already exists.')

Downloading stockprice5_pytorch.npz...
stockprice5_pytorch.npz downloaded.


In [7]:
npz = np.load('stockprice5_pytorch.npz')
x_train = npz['x_train']
y_train = npz['y_train']
x_test = npz['x_test']
y_test = npz['y_test']

In [8]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(13760, 60, 5) (13760, 1) (4065, 60, 5) (4065, 1)


### Test LSTM & TimeDistributed

In [9]:
# PyTorch LSTM test
# Input shape: (batch, seq_len, input_size)
inputs = torch.randn(32, 10, 8)
lstm = nn.LSTM(input_size=8, hidden_size=4, batch_first=True)
output, (h_n, c_n) = lstm(inputs)
print(output.shape)  # (batch, seq_len, hidden_size) - 모든 타임스텝의 출력
print(h_n.shape)     # (num_layers, batch, hidden_size) - 마지막 hidden state

torch.Size([32, 10, 4])
torch.Size([1, 32, 4])


In [None]:
# PyTorch LSTM returns both sequence outputs and states by default
lstm = nn.LSTM(input_size=8, hidden_size=4, batch_first=True)
whole_seq_output, (final_state_h, final_state_c) = lstm(inputs)
print(whole_seq_output.shape)  # (32, 10, 4) - 전체 시퀀스 출력
print(final_state_h.shape)     # (1, 32, 4) - 마지막 hidden state
print(final_state_c.shape)     # (1, 32, 4) - 마지막 cell state

In [3]:
# PyTorch TimeDistributed equivalent
# For Conv2D over time, we reshape (batch, seq_len, input_size) -> (batch*seq_len, input_size)
inputs = torch.randn(4, 128, 10)  # (batch, seq_len, input_size)
batch, seq_len, input_size = inputs.shape
print(inputs.shape)  # (4, 128, 10)

dense_layer = nn.Linear(input_size, 16) # output size = 16
# Reshape, apply conv, then reshape back
inputs_reshaped = inputs.view(batch * seq_len, input_size)
outputs = dense_layer(inputs_reshaped)
outputs = outputs.view(batch, seq_len, 16)
print(outputs.shape)  # (4, 128, 16)

torch.Size([4, 128, 10])
torch.Size([4, 128, 16])


In [4]:
# PyTorch RepeatVector equivalent
# Dense(32) -> RepeatVector(3) : (batch, 32) -> (batch, 3, 32)
x = torch.randn(4, 32)
dense = nn.Linear(32, 32)
x = dense(x)
# RepeatVector: unsqueeze and repeat
x = x.unsqueeze(1).repeat(1, 3, 1)
print(x.shape)  # (4, 3, 32)

torch.Size([4, 3, 32])


### Define Encoder

In [10]:
input_dim = 5
n_rnn_layers = 3
x_dims = 8
latent_dims = 8 # assumed to be the same as x_dims

In [11]:
class Encoder(nn.Module):
    def __init__(self, input_dim, x_dims, n_rnn_layers, dropout=0.2):
        super(Encoder, self).__init__()
        
        # TimeDistributed Dense equivalent: Linear layer applied to each timestep
        self.fc_input = nn.Linear(input_dim, x_dims)
        
        # Stacked LSTM layers
        # PyTorch LSTM can have multiple layers built-in
        self.lstm = nn.LSTM(
            input_size=x_dims,
            hidden_size=x_dims,
            num_layers=n_rnn_layers,
            batch_first=True,
            dropout=dropout if n_rnn_layers > 1 else 0  # dropout between LSTM layers
        )
        
        # Output layer
        self.fc_output = nn.Linear(x_dims, 1)
        
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # x shape: (batch, seq_len, input_dim)
        
        # Apply linear layer to each timestep (TimeDistributed equivalent)
        x = self.relu(self.fc_input(x))  # (batch, seq_len, x_dims)
        
        # LSTM
        lstm_out, (h_n, c_n) = self.lstm(x)
        # lstm_out: (batch, seq_len, x_dims) - all timestep outputs
        # h_n: (n_layers, batch, x_dims) - last hidden state for each layer
        
        # Use the last hidden state from the last layer
        last_hidden = h_n[-1]  # (batch, x_dims)
        
        # Output layer
        out = self.fc_output(last_hidden)  # (batch, 1)
        
        return out

def build_encoder():
    model = Encoder(input_dim, x_dims, n_rnn_layers, dropout=0.2)
    return model

In [12]:
model = build_encoder()
model = model.to(device)
print(model)

Encoder(
  (fc_input): Linear(in_features=5, out_features=8, bias=True)
  (lstm): LSTM(8, 8, num_layers=3, batch_first=True, dropout=0.2)
  (fc_output): Linear(in_features=8, out_features=1, bias=True)
  (relu): ReLU()
)


In [13]:
# test if model works
tmp_x_batch = torch.randn(1, 32, input_dim).to(device)
output = model(tmp_x_batch)
print(output.shape)  # (1, 1)
print(output)

torch.Size([1, 1])
tensor([[-0.0389]], grad_fn=<AddmmBackward0>)


In [8]:
batch_size = 16
epochs = 50

In [None]:
# Convert numpy arrays to PyTorch tensors
x_train_tensor = torch.FloatTensor(x_train)
y_train_tensor = torch.FloatTensor(y_train)
x_test_tensor = torch.FloatTensor(x_test)
y_test_tensor = torch.FloatTensor(y_test)

# Create DataLoaders
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Loss function and optimizer
criterion = nn.L1Loss()  # Mean Absolute Error
optimizer = optim.Adam(model.parameters())

In [None]:
def train_epoch(model, train_loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        
        optimizer.zero_grad()
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * batch_x.size(0)
    
    return total_loss / len(train_loader.dataset)

def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in test_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            total_loss += loss.item() * batch_x.size(0)
    
    return total_loss / len(test_loader.dataset)

# Training loop
for epoch in range(epochs):
    start_time = time.time()
    
    train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss = evaluate(model, test_loader, criterion, device)
    
    elapsed = time.time() - start_time
    print(f"Epoch {epoch+1}/{epochs} - {elapsed:.1f}s - "
          f"loss: {train_loss:.4f} - val_loss: {val_loss:.4f}")

In [None]:
model.eval()
with torch.no_grad():
    for _ in range(10):
        idx = np.random.randint(x_test.shape[0])
        x_sample = torch.FloatTensor(x_test[idx]).unsqueeze(0).to(device)
        y_pred = model(x_sample).cpu().numpy()[0, 0]

        plt.figure(figsize=(10, 5))
        plt.plot(x_test[idx, :, 0])
        plt.axhline(y_test[idx], color='r', label='True')
        plt.axhline(y_pred, color='g', label='Predicted')
        plt.legend()
        plt.show()