In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch.optim import Adam
import random
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import tensorflow as tf
import math
from torch.utils.data import TensorDataset, DataLoader

# 시드 고정
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed(42)

In [42]:
data = pd.read_csv('/home/aibig25/hong_sj/trb/num.csv')
data = data.fillna(0)

unique_ids = data['sequence_ID'].unique()
train_ids, test_ids = train_test_split(unique_ids, test_size=41, random_state=42)
train_data = data[data['sequence_ID'].isin(train_ids)]
test_data = data[data['sequence_ID'].isin(test_ids)]

independent_vars = data.columns.difference(['center_x', 'center_y','center_x_ma','center_y_ma', 'ID', 'LC'])
dependent_vars = ['center_y_ma']

scaler = MinMaxScaler()

train_data[independent_vars] = scaler.fit_transform(train_data[independent_vars])
test_data[independent_vars] = scaler.transform(test_data[independent_vars])

X_train = train_data[independent_vars]
y_train = train_data[dependent_vars]

X_test = test_data[independent_vars]
y_test = test_data[dependent_vars]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data[independent_vars] = scaler.fit_transform(train_data[independent_vars])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[independent_vars] = scaler.transform(test_data[independent_vars])


In [43]:
# 입력 및 예측 시퀀스 길이 정의
input_sequence_length = 30
output_sequence_length = 30

def create_sequences(data, input_sequence_length, output_sequence_length):
    X = []
    y = []

    for i in range(len(data) - input_sequence_length - output_sequence_length + 1):
        X.append(data.iloc[i:(i + input_sequence_length)][independent_vars].values)
        y.append(data.iloc[(i + input_sequence_length):(i + input_sequence_length + output_sequence_length)][dependent_vars].values)
    
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_data, input_sequence_length, output_sequence_length)
X_test, y_test = create_sequences(test_data, input_sequence_length, output_sequence_length)

In [44]:
# 데이터셋을 텐서로 변환
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
test_dataset = TensorDataset(torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32))

# 데이터 로더 생성
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

In [59]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, output_sequence_length, dropout_prob):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.dropout = nn.Dropout(dropout_prob)  # Dropout 레이어 추가
        self.fc1 = nn.Linear(hidden_size, hidden_size)  # 중간 FC 레이어 추가
        # self.relu = nn.ReLU()  # ReLU 활성화 함수 추가
        self.fc2 = nn.Linear(hidden_size, output_size * output_sequence_length)  # 최종 FC 레이어

    def forward(self, src, tgt=None):
        h0 = torch.zeros(self.num_layers, src.size(0), self.hidden_size, device=src.device)
        c0 = torch.zeros(self.num_layers, src.size(0), self.hidden_size, device=src.device)

        out, (hn, cn) = self.lstm(src, (h0, c0))
        out = self.dropout(out[:, -1, :])
        out = self.fc1(out)
        # out = self.relu(out)
        out = self.fc2(out)
        out = out.view(-1, output_sequence_length, output_size)
        return out

In [60]:
input_size = len(independent_vars)  # 입력 특성 수
hidden_size = 512  # LSTM 은닉 상태의 크기
num_layers = 3  # LSTM 층 수
output_size = 1 
dropout_prob = 0.1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델 인스턴스화
model = LSTMModel(input_size, hidden_size, num_layers, output_size, output_sequence_length, dropout_prob).to(device)
model = model.float()  # 데이터 타입을 float32로 맞추기

# 손실 함수와 옵티마이저
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.001)

In [61]:
model.eval()

LSTMModel(
  (lstm): LSTM(26, 512, num_layers=3, batch_first=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (fc1): Linear(in_features=512, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=30, bias=True)
)

In [62]:
device = torch.device("cuda")
model.to(device)

def train_model(model, train_loader, optimizer, criterion, epochs):
    model.train()  # 모델을 훈련 모드로 설정
    for epoch in range(epochs):
        total_loss = 0
        for src, tgt in train_loader:
            src = src.to(device)
            tgt = tgt.to(device)
            optimizer.zero_grad()

            # 모델 출력
            output = model(src, tgt)

            # 손실 계산을 위한 타겟 데이터 조정
            min_length = min(output.size(1), tgt.size(1) - 1)
            adjusted_tgt = tgt[:, 1:min_length+1, :]  # output 길이에 맞게 tgt 조정

            # 손실 계산
            output = output[:, :min_length, :]  # output도 동일한 길이로 조정
            loss = criterion(output, adjusted_tgt)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        average_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {average_loss:.4f}')


def evaluate_model(model, test_loader):
    model.eval()  # 모델을 평가 모드로 설정
    total_rmse = 0
    total_mape = 0
    total_count = 0
    
    with torch.no_grad():  # 기울기 계산을 중지하여 메모리 사용량과 계산 속도를 개선
        for src, tgt in test_loader:
            src = src.to(device)
            tgt = tgt.to(device)
            output = model(src, tgt[:, :-1, :])  # 마지막 타임 스텝을 제외하고 입력
            
            # 실제 값과 예측 값을 정렬
            tgt_actual = tgt[:, 1:, :]  # 첫 타임 스텝을 제외한 실제 값
            min_length = min(output.size(1), tgt_actual.size(1))
            output = output[:, :min_length, :]
            tgt_actual = tgt_actual[:, :min_length, :]
            
            # RMSE 계산
            rmse = torch.sqrt(torch.mean((output - tgt_actual) ** 2))
            total_rmse += rmse * output.size(0)  # 배치별 가중치를 더하기
            
            # MAPE 계산
            mape = torch.mean(torch.abs((tgt_actual - output) / tgt_actual)) * 100
            total_mape += mape * output.size(0)  # 배치별 가중치를 더하기
            
            total_count += output.size(0)
    
    average_rmse = total_rmse / total_count
    average_mape = total_mape / total_count
    print(f'Final Test RMSE: {average_rmse:.4f}')
    print(f'Final Test MAPE: {average_mape:.4f}')

In [63]:
train_model(model, train_loader, optimizer, criterion, epochs = 50)

Epoch 1/50, Loss: 2731.2527
Epoch 2/50, Loss: 1365.7049
Epoch 3/50, Loss: 1379.1712
Epoch 4/50, Loss: 1370.3059
Epoch 5/50, Loss: 1365.7967
Epoch 6/50, Loss: 1361.9006
Epoch 7/50, Loss: 1343.0354
Epoch 8/50, Loss: 1337.9465
Epoch 9/50, Loss: 1333.0394
Epoch 10/50, Loss: 1334.7480
Epoch 11/50, Loss: 1327.4986
Epoch 12/50, Loss: 1329.5402
Epoch 13/50, Loss: 1322.7567
Epoch 14/50, Loss: 1325.6450
Epoch 15/50, Loss: 1322.2001
Epoch 16/50, Loss: 1320.1116
Epoch 17/50, Loss: 1316.4087
Epoch 18/50, Loss: 1318.4296
Epoch 19/50, Loss: 1307.7125
Epoch 20/50, Loss: 1315.0563
Epoch 21/50, Loss: 1319.5478
Epoch 22/50, Loss: 1317.2959
Epoch 23/50, Loss: 1309.5585
Epoch 24/50, Loss: 1305.8117
Epoch 25/50, Loss: 1307.0466
Epoch 26/50, Loss: 1306.2000
Epoch 27/50, Loss: 1306.3902
Epoch 28/50, Loss: 1311.0769
Epoch 29/50, Loss: 1307.6081
Epoch 30/50, Loss: 1307.7204
Epoch 31/50, Loss: 1314.7675
Epoch 32/50, Loss: 1310.4591
Epoch 33/50, Loss: 1308.3166
Epoch 34/50, Loss: 1306.4211
Epoch 35/50, Loss: 1304

In [64]:
evaluate_model(model, test_loader)

Final Test RMSE: 26.9232
Final Test MAPE: 5.3703


In [65]:
torch.save(model.state_dict(), 'lstm_1_1.pth')