In [5]:
%pip install pandas numpy scikit-learn torch matplotlib

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-2.7.0-cp313-cp313-win_amd64.whl.metadata (29 kB)
Collecting filelock (from torch)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.7.0-cp313-cp313-win_amd64.whl (212.5 MB)
   ---------------------------------------- 0.0/212.5 MB ? eta -:--:--
    --------------------------------------- 5.2/212.5 MB 25.7 MB/s eta 0:00:09
   -- ------------------------------------- 10.7/212.5 MB 25.8 MB/s eta 0:00:08
   -- --------------------------------


[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: C:\Users\Admin\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [6]:
import pandas as pd
import numpy as np

# 파일명은 파일탐색기에서 확인 (예: train_heat.csv)
df = pd.read_csv("train_heat.csv", on_bad_lines='skip', encoding='utf-8', engine='python')
df = df[df['train_heat.tm'].astype(str).str.len() == 10]
df['datetime'] = pd.to_datetime(df['train_heat.tm'].astype(str), format='%Y%m%d%H')
df['train_heat.ta_chi'] = df['train_heat.ta_chi'].replace(-99.0, 0)
df['train_heat.heat_demand'] = df['train_heat.heat_demand'].fillna(0)

features = [
    'train_heat.ta', 'train_heat.wd', 'train_heat.ws', 'train_heat.rn_day', 'train_heat.rn_hr1',
    'train_heat.hm', 'train_heat.si', 'train_heat.ta_chi', 'train_heat.heat_demand'
]
data = df[features].dropna().reset_index(drop=True)


In [7]:
from sklearn.preprocessing import StandardScaler

SEQ_LEN = 24   # 24시간 입력
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

X, y = [], []
for i in range(len(data_scaled) - SEQ_LEN):
    X.append(data_scaled[i:i+SEQ_LEN, :-1])
    y.append(data_scaled[i+SEQ_LEN, -1])

X = np.array(X)
y = np.array(y)
print(X.shape, y.shape)  # (샘플 수, 시퀀스 길이, 변수 수), (샘플 수,)


(499277, 24, 8) (499277,)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

# PyTorch LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=1):
        super(LSTMMoㅎㅎdel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 32)
        self.fc2 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        # 마지막 시점의 출력만 사용
        last_output = lstm_out[:, -1, :]
        x = self.relu(self.fc1(last_output))
        x = self.fc2(x)
        return x

# 데이터를 PyTorch 텐서로 변환
X_tensor = torch.FloatTensor(X)
y_tensor = torch.FloatTensor(y).unsqueeze(1)

# 훈련/검증 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# 데이터 로더 생성
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128)

# 모델 초기화
model = LSTMModel(input_size=X.shape[2])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters())

print(f"모델 구조:")
print(model)
print(f"훈련 데이터: {X_train.shape}, 검증 데이터: {X_val.shape}")

# 모델 훈련
model.train()
for epoch in range(10):
    total_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    # 검증 손실 계산
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)
            val_loss += criterion(outputs, batch_y).item()
    
    print(f'Epoch [{epoch+1}/10], Train Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}')
    model.train()


모델 구조:
LSTMModel(
  (lstm): LSTM(8, 64, batch_first=True)
  (fc1): Linear(in_features=64, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=1, bias=True)
  (relu): ReLU()
)
훈련 데이터: torch.Size([399421, 24, 8]), 검증 데이터: torch.Size([99856, 24, 8])
Epoch [1/10], Train Loss: 0.5357, Val Loss: 0.4959
Epoch [2/10], Train Loss: 0.4629, Val Loss: 0.4490
Epoch [3/10], Train Loss: 0.4128, Val Loss: 0.4207
Epoch [4/10], Train Loss: 0.3747, Val Loss: 0.3579
Epoch [5/10], Train Loss: 0.3430, Val Loss: 0.3413
Epoch [6/10], Train Loss: 0.3183, Val Loss: 0.3121
Epoch [7/10], Train Loss: 0.2987, Val Loss: 0.2917
Epoch [8/10], Train Loss: 0.2791, Val Loss: 0.2726
Epoch [9/10], Train Loss: 0.2644, Val Loss: 0.2785
Epoch [10/10], Train Loss: 0.2513, Val Loss: 0.2528


In [11]:
import torch
import numpy as np
from sklearn.metrics import mean_squared_error

# (예시) y_true, y_pred가 numpy array 또는 torch tensor일 때 모두 지원
# 만약 모델이 torch tensor로 출력했다면 .detach().cpu().numpy()로 변환

# 예시 코드 (예측)
model.eval()
with torch.no_grad():
    y_pred = model(torch.from_numpy(X).float()).squeeze().cpu().numpy()

# 실제값(y)도 numpy array여야 함
# y는 이미 numpy array라고 가정

# RMSE 계산
rmse = np.sqrt(mean_squared_error(y, y_pred))
print("최종 RMSE:", rmse)


최종 RMSE: 0.4907942584773125
