In [2]:
# ✅ 0. CSV 로딩 및 전처리 (하루 총 발전량 예측용: scalar target)
import pandas as pd
import numpy as np

# 전체 컬럼 로딩
df = pd.read_csv("df_solar_final_sorted.csv")

# 그룹화 및 일 단위 데이터 수집
dynamic_x_list, static_x_list, labels_list = [], [], []

for plant, group in df.groupby('발전구분'):
    group = group.sort_values(by=['year', 'month', 'day', 'hour'])
    for (y, m, d), day_data in group.groupby(['year', 'month', 'day']):
        if len(day_data) != 24:
            continue
        if day_data['일사(MJ/m2)'].sum() == 0:
            continue  # ✅ 야간 제거
        day_data = day_data.sort_values('hour')

        dyn = day_data[[
            '일사(MJ/m2)', '태양고도', '기온(°C)', '풍속(m/s)', '풍향(16방위)',
            '하늘상태', '습도(%)', '강수량(mm)', '적설(cm)'
        ]].values
        dynamic_x_list.append(dyn)

        static = [
            day_data.iloc[0]['설비용량(MW)'],
            day_data.iloc[0]['연식(년)'],
            day_data.iloc[0]['month'],
            day_data.iloc[0]['weekday']
        ]
        static_x_list.append(static)

        labels_list.append(np.log1p(day_data['발전량(kWh)'].sum()))  # ✅ 하루 총합으로 변경

# numpy 변환 및 NaN 제거
dynamic_x = np.stack(dynamic_x_list)
static_x = np.stack(static_x_list)
labels = np.array(labels_list).reshape(-1, 1)  # ✅ shape: (N, 1)

mask = (
    ~np.isnan(dynamic_x).any(axis=(1, 2)) &
    ~np.isnan(static_x).any(axis=1) &
    ~np.isnan(labels).any(axis=1)
)

dynamic_x = dynamic_x[mask]
static_x = static_x[mask]
labels = labels[mask]

In [3]:
# ✅ 1. 라이브러리
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import matplotlib.pyplot as plt
import random

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed()

# ✅ 2. Transformer
class SolarForecastTransformer(nn.Module):
    def __init__(self, input_dim, model_dim=128, num_heads=4, num_layers=2, dropout=0.1, output_dim=1):
        super().__init__()
        self.input_projection = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.static_projection = nn.Linear(4, model_dim)
        self.output_layer = nn.Linear(model_dim, output_dim)

    def forward(self, dynamic_x, static_x):
        x = self.input_projection(dynamic_x)
        x = self.transformer_encoder(x)
        x = x.mean(dim=1)
        static_emb = self.static_projection(static_x)
        fused = x + static_emb
        return self.output_layer(fused)

# ✅ 3. Dataset
class SolarDataset(Dataset):
    def __init__(self, dynamic_x, static_x, y):
        self.dynamic_x = torch.tensor(dynamic_x, dtype=torch.float32)
        self.static_x = torch.tensor(static_x, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.dynamic_x[idx], self.static_x[idx], self.y[idx]

# ✅ 4. 학습 및 평가

def train_model(model, dataloader, optimizer, criterion, epoch, total_epochs, repeat, total_repeats):
    model.train()
    total_loss = 0
    for dynamic_x, static_x, y in dataloader:
        optimizer.zero_grad()
        outputs = model(dynamic_x, static_x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"[Repeat {repeat}/{total_repeats}] Epoch {epoch}/{total_epochs} - Loss: {total_loss / len(dataloader):.4f}")


def evaluate_model(model, dataloader):
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for dynamic_x, static_x, y in dataloader:
            outputs = model(dynamic_x, static_x)
            preds.append(outputs.numpy())
            trues.append(y.numpy())
    preds = np.expm1(np.vstack(preds))  # ✅ 로그 복원
    trues = np.expm1(np.vstack(trues))
    mae = mean_absolute_error(trues, preds)
    rmse = mean_squared_error(trues, preds, squared=False)
    r2 = r2_score(trues, preds)
    mean_true = np.mean(trues)
    rmse_ratio = (rmse / mean_true) * 100
    print(f"RMSE Ratio: {rmse_ratio:.2f}%")
    return mae, rmse, r2, preds, trues

# ✅ 5. 실행

def run_experiment(dynamic_x, static_x, y):
    X_train, X_test, s_train, s_test, y_train, y_test = train_test_split(dynamic_x, static_x, y, test_size=0.2, random_state=42)
    train_dataset = SolarDataset(X_train, s_train, y_train)
    test_dataset = SolarDataset(X_test, s_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    best_rmse = float('inf')
    best_model = None

    n_repeat = 3
    for repeat in range(1, n_repeat + 1):
        for model_dim in [64, 128]:
            for lr in [1e-3, 5e-4]:
                print(f"\n▶ Repeat {repeat}/{n_repeat} | model_dim={model_dim}, lr={lr}")
                model = SolarForecastTransformer(input_dim=X_train.shape[2], model_dim=model_dim)
                optimizer = optim.Adam(model.parameters(), lr=lr)
                criterion = nn.MSELoss()
                total_epochs = 30
                for epoch in range(1, total_epochs + 1):
                    train_model(model, train_loader, optimizer, criterion, epoch, total_epochs, repeat, n_repeat)
                mae, rmse, r2, _, _ = evaluate_model(model, test_loader)
                print(f"Evaluation → MAE: {mae:.3f}, RMSE: {rmse:.3f}, R²: {r2:.3f}")
                if rmse < best_rmse:
                    best_rmse = rmse
                    best_model = model

    return best_model, test_loader

# ✅ 6. 시각화

def plot_sample_prediction(model, test_loader, index=0):
    model.eval()
    all_data = list(test_loader)
    dx, sx, y_true = all_data[index]
    with torch.no_grad():
        pred = model(dx, sx).numpy()
        true = y_true.numpy()
    pred = np.expm1(pred.flatten())
    true = np.expm1(true.flatten())
    plt.figure(figsize=(8, 4))
    plt.bar(["Actual"], true, color='skyblue')
    plt.bar(["Predicted"], pred, color='orange')
    plt.title(f"Sample {index} - 하루 총 발전량 예측 (kWh)")
    plt.ylabel("kWh")
    plt.grid(True, axis='y')
    plt.show()

In [None]:
# ✅ 7. 실행
final_model, test_loader = run_experiment(dynamic_x, static_x, labels)
plot_sample_prediction(final_model, test_loader, index=0)


▶ Repeat 1/3 | model_dim=64, lr=0.001
[Repeat 1/3] Epoch 1/30 - Loss: 7.3184
[Repeat 1/3] Epoch 2/30 - Loss: 2.5218
[Repeat 1/3] Epoch 3/30 - Loss: 2.2121
[Repeat 1/3] Epoch 4/30 - Loss: 2.0282
[Repeat 1/3] Epoch 5/30 - Loss: 1.7966
[Repeat 1/3] Epoch 6/30 - Loss: 1.7072
[Repeat 1/3] Epoch 7/30 - Loss: 1.7301
[Repeat 1/3] Epoch 8/30 - Loss: 1.6764
[Repeat 1/3] Epoch 9/30 - Loss: 1.6851
[Repeat 1/3] Epoch 10/30 - Loss: 1.6810
[Repeat 1/3] Epoch 11/30 - Loss: 1.6639
[Repeat 1/3] Epoch 12/30 - Loss: 1.6284
[Repeat 1/3] Epoch 13/30 - Loss: 1.6472
[Repeat 1/3] Epoch 14/30 - Loss: 1.6395
[Repeat 1/3] Epoch 15/30 - Loss: 1.6393
[Repeat 1/3] Epoch 16/30 - Loss: 1.6500
[Repeat 1/3] Epoch 17/30 - Loss: 1.6278
[Repeat 1/3] Epoch 18/30 - Loss: 1.6196
[Repeat 1/3] Epoch 19/30 - Loss: 1.6149
[Repeat 1/3] Epoch 20/30 - Loss: 1.6126
[Repeat 1/3] Epoch 21/30 - Loss: 1.5885
[Repeat 1/3] Epoch 22/30 - Loss: 1.6204
[Repeat 1/3] Epoch 23/30 - Loss: 1.5847
[Repeat 1/3] Epoch 24/30 - Loss: 1.5969
[Repeat 1/



RMSE Ratio: 43.43%
Evaluation → MAE: 3855.048, RMSE: 7542.766, R²: 0.891

▶ Repeat 1/3 | model_dim=64, lr=0.0005
[Repeat 1/3] Epoch 1/30 - Loss: 4.8331
[Repeat 1/3] Epoch 2/30 - Loss: 2.6191
[Repeat 1/3] Epoch 3/30 - Loss: 2.3040
[Repeat 1/3] Epoch 4/30 - Loss: 2.0467
[Repeat 1/3] Epoch 5/30 - Loss: 1.8704
[Repeat 1/3] Epoch 6/30 - Loss: 1.7722
[Repeat 1/3] Epoch 7/30 - Loss: 1.7398
[Repeat 1/3] Epoch 8/30 - Loss: 1.6830
[Repeat 1/3] Epoch 9/30 - Loss: 1.6695
[Repeat 1/3] Epoch 10/30 - Loss: 1.6827
[Repeat 1/3] Epoch 11/30 - Loss: 1.6668
[Repeat 1/3] Epoch 12/30 - Loss: 1.6479
[Repeat 1/3] Epoch 13/30 - Loss: 1.6496
[Repeat 1/3] Epoch 14/30 - Loss: 1.6721
[Repeat 1/3] Epoch 15/30 - Loss: 1.6152
[Repeat 1/3] Epoch 16/30 - Loss: 1.6081
[Repeat 1/3] Epoch 17/30 - Loss: 1.6054
[Repeat 1/3] Epoch 18/30 - Loss: 1.6377
[Repeat 1/3] Epoch 19/30 - Loss: 1.6166
[Repeat 1/3] Epoch 20/30 - Loss: 1.6108
