In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# 加载数据
data = pd.read_csv('pivot_50_time.csv')

# 数据预处理
# 提取特征列和目标列
numerical_features = ['temperature_2m (°C)', 'apparent_temperature (°C)', 'rain (mm)', 'wind_speed_100m (km/h)']
categorical_features = ['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos', 'month_sin', 'month_cos']
stations = [col for col in data.columns if col.isdigit()]  # 假设车站列是数字

# 构造输入X和目标y
X = data[categorical_features + numerical_features].values
y = data[stations].values

# 定义时间窗口
time_window = 24

# 构造时间窗口的数据集
def create_time_window(X, y, time_window):
    X_window, y_window = [], []
    for i in range(len(X) - time_window):
        X_window.append(X[i:i+time_window])
        y_window.append(y[i+time_window])  # 预测当前时间步的目标
    return np.array(X_window), np.array(y_window)

X_window, y_window = create_time_window(X, y, time_window)

# 使用 KFold 进行交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 初始化存储结果的列表
maes = []
rmses = []

# 交叉验证过程
for fold, (train_index, val_index) in enumerate(kf.split(X_window)):
    print(f"\nFold {fold + 1}")

    # 分割数据
    X_train, X_val = X_window[train_index], X_window[val_index]
    y_train, y_val = y_window[train_index], y_window[val_index]

    # 转换为Tensor
    X_train, X_val = map(torch.tensor, (X_train, X_val))
    y_train, y_val = map(torch.tensor, (y_train, y_val))

    # 数据集类
    class TimeSeriesDataset(Dataset):
        def __init__(self, X, y):
            self.X = X
            self.y = y

        def __len__(self):
            return len(self.X)

        def __getitem__(self, idx):
            return self.X[idx], self.y[idx]

    # 数据加载器
    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # GRU 模型定义
    class GRUModel(nn.Module):
        def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout):
            super(GRUModel, self).__init__()
            self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
            self.fc = nn.Linear(hidden_dim, output_dim)

        def forward(self, x):
            out, _ = self.gru(x)
            out = self.fc(out[:, -1, :])  # 只取最后一个时间步的输出
            return out

    # 模型初始化
    input_dim = X_train.shape[2]
    hidden_dim = 64
    output_dim = y_train.shape[1]
    num_layers = 2
    dropout = 0.3
    model = GRUModel(input_dim, hidden_dim, output_dim, num_layers, dropout)

    # 使用 Huber Loss 替代 MSELoss，并设置 delta 参数
    from torch.nn import HuberLoss
    delta = 1.0  # 可以根据数据分布或验证集表现调整该值
    criterion = HuberLoss(delta=delta)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    best_val_loss = float('inf')
    patience = 5
    trigger_times = 0

    for epoch in range(100):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.float().to(device), y_batch.float().to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)  # 使用 Huber Loss
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        val_loss = 0
        model.eval()
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.float().to(device), y_batch.float().to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)  # 使用 Huber Loss
                val_loss += loss.item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # 早停机制
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
            torch.save(model.state_dict(), f'best_gru_model_fold{fold+1}.pth')  # 保存最佳模型
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print("Early stopping!")
                break

    # 加载最佳模型
    model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))

    # 测试模型
    model.eval()
    y_pred = []
    y_true = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.float().to(device)
            outputs = model(X_batch)
            y_pred.append(outputs.cpu().numpy())
            y_true.append(y_batch.numpy())

    y_pred = np.vstack(y_pred)
    y_true = np.vstack(y_true)

    # 评估指标
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    maes.append(mae)
    rmses.append(rmse)

    print(f"Fold {fold+1} - MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# 输出所有折的平均性能
print(f"\nAverage MAE: {np.mean(maes):.4f}")
print(f"Average RMSE: {np.mean(rmses):.4f}")




Fold 1
Epoch 1, Train Loss: 0.1844, Val Loss: 0.1807
Epoch 2, Train Loss: 0.1793, Val Loss: 0.1801
Epoch 3, Train Loss: 0.1784, Val Loss: 0.1789
Epoch 4, Train Loss: 0.1777, Val Loss: 0.1789
Epoch 5, Train Loss: 0.1772, Val Loss: 0.1780
Epoch 6, Train Loss: 0.1767, Val Loss: 0.1778
Epoch 7, Train Loss: 0.1763, Val Loss: 0.1773
Epoch 8, Train Loss: 0.1755, Val Loss: 0.1770
Epoch 9, Train Loss: 0.1751, Val Loss: 0.1763
Epoch 10, Train Loss: 0.1744, Val Loss: 0.1766
Epoch 11, Train Loss: 0.1741, Val Loss: 0.1758
Epoch 12, Train Loss: 0.1734, Val Loss: 0.1750
Epoch 13, Train Loss: 0.1730, Val Loss: 0.1760
Epoch 14, Train Loss: 0.1725, Val Loss: 0.1744
Epoch 15, Train Loss: 0.1723, Val Loss: 0.1746
Epoch 16, Train Loss: 0.1718, Val Loss: 0.1744
Epoch 17, Train Loss: 0.1715, Val Loss: 0.1740
Epoch 18, Train Loss: 0.1711, Val Loss: 0.1735
Epoch 19, Train Loss: 0.1709, Val Loss: 0.1733
Epoch 20, Train Loss: 0.1704, Val Loss: 0.1735
Epoch 21, Train Loss: 0.1705, Val Loss: 0.1731
Epoch 22, Trai

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 1 - MAE: 0.4002, RMSE: 0.6759

Fold 2
Epoch 1, Train Loss: 0.1845, Val Loss: 0.1802
Epoch 2, Train Loss: 0.1795, Val Loss: 0.1796
Epoch 3, Train Loss: 0.1784, Val Loss: 0.1790
Epoch 4, Train Loss: 0.1776, Val Loss: 0.1788
Epoch 5, Train Loss: 0.1773, Val Loss: 0.1780
Epoch 6, Train Loss: 0.1769, Val Loss: 0.1777
Epoch 7, Train Loss: 0.1763, Val Loss: 0.1776
Epoch 8, Train Loss: 0.1758, Val Loss: 0.1769
Epoch 9, Train Loss: 0.1754, Val Loss: 0.1765
Epoch 10, Train Loss: 0.1750, Val Loss: 0.1758
Epoch 11, Train Loss: 0.1744, Val Loss: 0.1755
Epoch 12, Train Loss: 0.1739, Val Loss: 0.1750
Epoch 13, Train Loss: 0.1736, Val Loss: 0.1746
Epoch 14, Train Loss: 0.1728, Val Loss: 0.1748
Epoch 15, Train Loss: 0.1726, Val Loss: 0.1742
Epoch 16, Train Loss: 0.1723, Val Loss: 0.1741
Epoch 17, Train Loss: 0.1718, Val Loss: 0.1732
Epoch 18, Train Loss: 0.1714, Val Loss: 0.1731
Epoch 19, Train Loss: 0.1710, Val Loss: 0.1727
Epoch 20, Train Loss: 0.1706, Val Loss: 0.1729
Epoch 21, Train Loss: 0.17

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 2 - MAE: 0.3961, RMSE: 0.6917

Fold 3
Epoch 1, Train Loss: 0.1838, Val Loss: 0.1826
Epoch 2, Train Loss: 0.1790, Val Loss: 0.1826
Epoch 3, Train Loss: 0.1779, Val Loss: 0.1810
Epoch 4, Train Loss: 0.1774, Val Loss: 0.1804
Epoch 5, Train Loss: 0.1768, Val Loss: 0.1800
Epoch 6, Train Loss: 0.1765, Val Loss: 0.1800
Epoch 7, Train Loss: 0.1760, Val Loss: 0.1794
Epoch 8, Train Loss: 0.1758, Val Loss: 0.1788
Epoch 9, Train Loss: 0.1752, Val Loss: 0.1795
Epoch 10, Train Loss: 0.1744, Val Loss: 0.1785
Epoch 11, Train Loss: 0.1739, Val Loss: 0.1777
Epoch 12, Train Loss: 0.1736, Val Loss: 0.1769
Epoch 13, Train Loss: 0.1729, Val Loss: 0.1763
Epoch 14, Train Loss: 0.1723, Val Loss: 0.1764
Epoch 15, Train Loss: 0.1724, Val Loss: 0.1764
Epoch 16, Train Loss: 0.1716, Val Loss: 0.1756
Epoch 17, Train Loss: 0.1713, Val Loss: 0.1753
Epoch 18, Train Loss: 0.1710, Val Loss: 0.1749
Epoch 19, Train Loss: 0.1705, Val Loss: 0.1747
Epoch 20, Train Loss: 0.1701, Val Loss: 0.1744
Epoch 21, Train Loss: 0.16

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 3 - MAE: 0.4025, RMSE: 0.6768

Fold 4
Epoch 1, Train Loss: 0.1860, Val Loss: 0.1778
Epoch 2, Train Loss: 0.1805, Val Loss: 0.1757
Epoch 3, Train Loss: 0.1795, Val Loss: 0.1751
Epoch 4, Train Loss: 0.1789, Val Loss: 0.1750
Epoch 5, Train Loss: 0.1783, Val Loss: 0.1749
Epoch 6, Train Loss: 0.1780, Val Loss: 0.1743
Epoch 7, Train Loss: 0.1776, Val Loss: 0.1741
Epoch 8, Train Loss: 0.1772, Val Loss: 0.1731
Epoch 9, Train Loss: 0.1767, Val Loss: 0.1728
Epoch 10, Train Loss: 0.1762, Val Loss: 0.1720
Epoch 11, Train Loss: 0.1754, Val Loss: 0.1724
Epoch 12, Train Loss: 0.1750, Val Loss: 0.1716
Epoch 13, Train Loss: 0.1745, Val Loss: 0.1709
Epoch 14, Train Loss: 0.1739, Val Loss: 0.1704
Epoch 15, Train Loss: 0.1733, Val Loss: 0.1702
Epoch 16, Train Loss: 0.1731, Val Loss: 0.1703
Epoch 17, Train Loss: 0.1727, Val Loss: 0.1712
Epoch 18, Train Loss: 0.1722, Val Loss: 0.1703
Epoch 19, Train Loss: 0.1720, Val Loss: 0.1693
Epoch 20, Train Loss: 0.1716, Val Loss: 0.1690
Epoch 21, Train Loss: 0.17

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 4 - MAE: 0.3906, RMSE: 0.6748

Fold 5
Epoch 1, Train Loss: 0.1858, Val Loss: 0.1789
Epoch 2, Train Loss: 0.1801, Val Loss: 0.1777
Epoch 3, Train Loss: 0.1788, Val Loss: 0.1771
Epoch 4, Train Loss: 0.1782, Val Loss: 0.1766
Epoch 5, Train Loss: 0.1777, Val Loss: 0.1760
Epoch 6, Train Loss: 0.1771, Val Loss: 0.1755
Epoch 7, Train Loss: 0.1764, Val Loss: 0.1752
Epoch 8, Train Loss: 0.1759, Val Loss: 0.1754
Epoch 9, Train Loss: 0.1756, Val Loss: 0.1748
Epoch 10, Train Loss: 0.1751, Val Loss: 0.1744
Epoch 11, Train Loss: 0.1745, Val Loss: 0.1737
Epoch 12, Train Loss: 0.1742, Val Loss: 0.1739
Epoch 13, Train Loss: 0.1736, Val Loss: 0.1737
Epoch 14, Train Loss: 0.1732, Val Loss: 0.1733
Epoch 15, Train Loss: 0.1729, Val Loss: 0.1732
Epoch 16, Train Loss: 0.1724, Val Loss: 0.1727
Epoch 17, Train Loss: 0.1721, Val Loss: 0.1724
Epoch 18, Train Loss: 0.1718, Val Loss: 0.1723
Epoch 19, Train Loss: 0.1713, Val Loss: 0.1719
Epoch 20, Train Loss: 0.1713, Val Loss: 0.1719
Epoch 21, Train Loss: 0.17

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 5 - MAE: 0.4000, RMSE: 0.6721

Average MAE: 0.3979
Average RMSE: 0.6783
