In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch.optim import Adam
import random
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
import math
from torch.utils.data import TensorDataset, DataLoader

# 시드 고정
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed(42)

In [2]:
data = pd.read_csv('/home/aibig25/hong_sj/trb/num.csv')
data = data.fillna(0)

unique_ids = data['sequence_ID'].unique()
train_ids, test_ids = train_test_split(unique_ids, test_size=41, random_state=42)
train_data = data[data['sequence_ID'].isin(train_ids)]
test_data = data[data['sequence_ID'].isin(test_ids)]

independent_vars = data.columns.difference(['center_x', 'center_y','center_x_ma','center_y_ma', 'ID', 'LC'])
dependent_vars = ['center_y_ma']

scaler = MinMaxScaler()

train_data[independent_vars] = scaler.fit_transform(train_data[independent_vars])
test_data[independent_vars] = scaler.transform(test_data[independent_vars])

X_train = train_data[independent_vars]
y_train = train_data[dependent_vars]

X_test = test_data[independent_vars]
y_test = test_data[dependent_vars]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data[independent_vars] = scaler.fit_transform(train_data[independent_vars])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[independent_vars] = scaler.transform(test_data[independent_vars])


In [3]:
# 입력 및 예측 시퀀스 길이 정의
input_sequence_length = 60
output_sequence_length = 60

def create_sequences(data, input_sequence_length, output_sequence_length):
    X = []
    y = []

    for i in range(len(data) - input_sequence_length - output_sequence_length + 1):
        X.append(data.iloc[i:(i + input_sequence_length)][independent_vars].values)
        y.append(data.iloc[(i + input_sequence_length):(i + input_sequence_length + output_sequence_length)][dependent_vars].values)
    
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_data, input_sequence_length, output_sequence_length)
X_test, y_test = create_sequences(test_data, input_sequence_length, output_sequence_length)

In [4]:
# 데이터셋을 텐서로 변환
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))

# 데이터 로더 생성
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

In [5]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size * output_sequence_length)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=x.device)  # GPU를 위한 설정
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size, device=x.device)  # GPU를 위한 설정
        out, (hn, cn) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # 마지막 시간 단계의 출력만 사용
        out = out.view(-1, output_sequence_length, output_size)  # 출력 형태 조정
        return out

In [6]:
input_size = len(independent_vars)  # 입력 특성 수
hidden_size = 128  # LSTM 은닉 상태의 크기
num_layers = 2  # LSTM 층 수
output_size = output_sequence_length  # 출력 시퀀스 길이

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델 인스턴스화
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)
model = model.float()  # 데이터 타입을 float32로 맞추기

# 손실 함수와 옵티마이저
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [7]:
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)  # 데이터를 GPU로 전송
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

def evaluate_model(model, test_loader):
    model.eval()
    total_rmse = 0
    total_mape = 0
    count = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)  # 데이터를 GPU로 전송
            outputs = model(inputs)
            rmse = torch.sqrt(criterion(outputs, targets))
            mape = torch.mean(torch.abs((targets - outputs) / (targets + 1e-8))) * 100
            total_rmse += rmse.item()
            total_mape += mape.item()
            count += 1
    print(f'Mean RMSE: {total_rmse/count}, Mean MAPE: {total_mape/count}')

In [8]:
train_model(model, train_loader, criterion, optimizer, num_epochs=50)

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/50, Loss: 4540.169921875
Epoch 2/50, Loss: 162.42750549316406
Epoch 3/50, Loss: 4999.19384765625
Epoch 4/50, Loss: 1224.67041015625
Epoch 5/50, Loss: 144.51171875
Epoch 6/50, Loss: 31.801719665527344
Epoch 7/50, Loss: 150.4801483154297
Epoch 8/50, Loss: 103.3359375
Epoch 9/50, Loss: 5807.41796875
Epoch 10/50, Loss: 129.0937042236328
Epoch 11/50, Loss: 803.3323974609375
Epoch 12/50, Loss: 53.496551513671875
Epoch 13/50, Loss: 5514.310546875
Epoch 14/50, Loss: 832.9743041992188
Epoch 15/50, Loss: 72.28206634521484
Epoch 16/50, Loss: 1402.0093994140625
Epoch 17/50, Loss: 131.2351531982422
Epoch 18/50, Loss: 899.9906005859375
Epoch 19/50, Loss: 49.965213775634766
Epoch 20/50, Loss: 8.756131172180176
Epoch 21/50, Loss: 154.79444885253906
Epoch 22/50, Loss: 8.9902982711792
Epoch 23/50, Loss: 1783.6033935546875
Epoch 24/50, Loss: 94.68706512451172
Epoch 25/50, Loss: 898.9409790039062
Epoch 26/50, Loss: 4097.642578125
Epoch 27/50, Loss: 1964.6998291015625
Epoch 28/50, Loss: 368.4723815

In [9]:
evaluate_model(model, test_loader)

Mean RMSE: 28.363592924402774, Mean MAPE: 5.550024182730013
