In [1]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split  

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
# 데이터 로드
train_data = pd.read_csv('C:/Users/user/Desktop/deep-learning-programing-project/train_data.csv')
test_data = pd.read_csv('C:/Users/user/Desktop/deep-learning-programing-project/test_data.csv')

In [3]:
# 데이터 전처리
X_train = train_data.iloc[:, 1:3197].values  # FLUX1~FLUX3196
y_train = train_data.iloc[:, 3197].values  # FLUX3197

X_test = test_data.iloc[:, 1:3197].values  # FLUX1~FLUX3196
y_test = test_data.iloc[:, 3197].values  # FLUX3197

scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))

X_test_scaled = scaler_X.transform(X_test)
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))

X_train.shape

(5087, 3196)

In [4]:
def create_sequences(data, labels, seq_length):
    sequences = []
    seq_labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i+seq_length])
        seq_labels.append(labels[i+seq_length])
    return np.array(sequences), np.array(seq_labels)

In [5]:
seq_length = 25  # 적절한 시퀀스 길이 설정

X_train_sequences, y_train_sequences = create_sequences(X_train_scaled, y_train_scaled, seq_length)
X_test_sequences, y_test_sequences = create_sequences(X_test_scaled, y_test_scaled, seq_length)

# 텐서로 변환
X_train_tensor = torch.tensor(X_train_sequences, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train_sequences, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_sequences, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test_sequences, dtype=torch.float32).to(device)

X_train_tensor.shape

torch.Size([5062, 25, 3196])

In [13]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [14]:
input_size = X_train_tensor.shape[2]
hidden_size = 64
num_layers = 2
output_size = 1

In [15]:
model = LSTM(input_size, hidden_size, num_layers, output_size, seq_length).to(device)

TypeError: LSTM.__init__() takes 5 positional arguments but 6 were given

In [9]:
# 손실 함수 및 옵티마이저
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [12]:
# 모델 학습
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test_tensor)
        val_loss = criterion(val_outputs, y_test_tensor)
    
    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss.item():.4f}')

RuntimeError: input.size(-1) must be equal to input_size. Expected 64, got 3196

In [None]:
# 결과 시각화
model.eval()
all_data = np.concatenate((X_train, X_val), axis=0)
all_labels = np.concatenate((y_train, y_val), axis=0)
all_tensor = torch.tensor(all_data, dtype=torch.float32).to(device)

predicted_labels = model(all_tensor).cpu().detach().numpy()
actual_labels = all_labels

plt.figure(figsize=(10,6))
plt.plot(actual_labels, label='Actual Data')
plt.plot(predicted_labels, label='Predicted Data')
plt.title('Time-Series Prediction')
plt.legend()
plt.show()