In [3]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from torch.nn import HuberLoss  # 导入 Huber Loss
import pandas as pd

# 加载数据
data = pd.read_csv('pivot_50_time.csv')

# 数据预处理
# 提取特征列和目标列
numerical_features = ['temperature_2m (°C)', 'apparent_temperature (°C)', 'rain (mm)', 'wind_speed_100m (km/h)']
categorical_features = ['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos', 'month_sin', 'month_cos']
stations = [col for col in data.columns if col.isdigit()]  # 假设车站列是数字

# 构造输入X和目标y
X = data[categorical_features + numerical_features].values
y = data[stations].values

# 定义时间窗口
time_window = 48

# 构造时间窗口的数据集
def create_time_window(X, y, time_window):
    X_window, y_window = [], []
    for i in range(len(X) - time_window):
        X_window.append(X[i:i+time_window])
        y_window.append(y[i+time_window])  # 预测当前时间步的目标
    return np.array(X_window), np.array(y_window)

X_window, y_window = create_time_window(X, y, time_window)

# 使用 KFold 进行交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 初始化存储结果的列表
maes = []
rmses = []

# 交叉验证过程
for fold, (train_index, val_index) in enumerate(kf.split(X_window)):
    print(f"\nFold {fold + 1}")

    # 分割数据
    X_train, X_val = X_window[train_index], X_window[val_index]
    y_train, y_val = y_window[train_index], y_window[val_index]

    # 转换为Tensor
    X_train, X_val = map(torch.tensor, (X_train, X_val))
    y_train, y_val = map(torch.tensor, (y_train, y_val))

    # 数据集类
    class TimeSeriesDataset(Dataset):
        def __init__(self, X, y):
            self.X = X
            self.y = y

        def __len__(self):
            return len(self.X)

        def __getitem__(self, idx):
            return self.X[idx], self.y[idx]

    # 数据加载器
    train_dataset = TimeSeriesDataset(X_train, y_train)
    val_dataset = TimeSeriesDataset(X_val, y_val)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # GRU 和 CNN 模型定义
    class GRUModelWithCNN(nn.Module):
        def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout, filters, kernel_size):
            super(GRUModelWithCNN, self).__init__()
            
            # 第一层 CNN
            self.conv1d_1 = nn.Conv1d(
                in_channels=input_dim,       # 输入通道数
                out_channels=filters,        # 可调整的 filter 数
                kernel_size=kernel_size,     # 卷积核大小
                padding=kernel_size // 2     # 保持输出的时间维度不变
            )
            self.relu = nn.ReLU()
            self.dropout = nn.Dropout(dropout)
            
            # GRU 层
            self.gru = nn.GRU(
                input_size=filters ,      # GRU 输入维度为第二层 CNN 的输出通道数
                hidden_size=hidden_dim,      # GRU 隐藏单元数
                num_layers=num_layers,       # GRU 层数
                batch_first=True,
                dropout=dropout
            )
            
            # 全连接层
            self.fc = nn.Linear(hidden_dim, output_dim)

        def forward(self, x):
            # CNN expects input in shape [batch_size, channels, time_steps]
            x = x.transpose(1, 2)  # [batch_size, time_steps, input_dim] -> [batch_size, input_dim, time_steps]
            
            # 第一层 CNN
            x = self.conv1d_1(x)
            x = self.relu(x)
            x = self.dropout(x)  # Apply dropout after activation
            
            # 调整回 GRU 输入格式
            x = x.transpose(1, 2)  # [batch_size, filters, time_steps] -> [batch_size, time_steps, filters]
            
            # GRU 层
            out, _ = self.gru(x)
            
            # 取 GRU 最后一个时间步的输出
            out = self.fc(out[:, -1, :])
            return out

    # 模型初始化
    input_dim = X_train.shape[2]  # 输入特征维度
    hidden_dim = 64               # GRU 隐藏单元数
    output_dim = y_train.shape[1] # 输出维度
    num_layers = 2                # GRU 层数
    dropout = 0.25                # Dropout 概率

    # 调整 filter 和卷积核大小
    filters = 64               # CNN 的 filter 数量
    kernel_size = 3              # 卷积核大小

    model = GRUModelWithCNN(
        input_dim=input_dim,
        hidden_dim=hidden_dim,
        output_dim=output_dim,
        num_layers=num_layers,
        dropout=dropout,
        filters=filters,
        kernel_size=kernel_size
    )

    # 打印模型结构（可选）
    print(model)

    # 使用 Huber Loss 替代 MSELoss，并设置 delta 参数
    criterion = HuberLoss(delta=1.0)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练模型
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    best_val_loss = float('inf')
    patience = 5
    trigger_times = 0

    for epoch in range(100):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.float().to(device), y_batch.float().to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)  # 使用 Huber Loss
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        val_loss = 0
        model.eval()
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.float().to(device), y_batch.float().to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)  # 使用 Huber Loss
                val_loss += loss.item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # 早停机制
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            trigger_times = 0
            torch.save(model.state_dict(), f'best_gru_model_fold{fold+1}.pth')  # 保存最佳模型
        else:
            trigger_times += 1
            if trigger_times >= patience:
                print("Early stopping!")
                break

    # 加载最佳模型
    model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))

    # 测试模型
    model.eval()
    y_pred = []
    y_true = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch = X_batch.float().to(device)
            outputs = model(X_batch)
            y_pred.append(outputs.cpu().numpy())
            y_true.append(y_batch.numpy())

    y_pred = np.vstack(y_pred)
    y_true = np.vstack(y_true)

    # 评估指标
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    maes.append(mae)
    rmses.append(rmse)

    print(f"Fold {fold+1} - MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# 输出所有折的平均性能
print(f"\nAverage MAE: {np.mean(maes):.4f}")
print(f"Average RMSE: {np.mean(rmses):.4f}")



Fold 1
GRUModelWithCNN(
  (conv1d_1): Conv1d(10, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (relu): ReLU()
  (dropout): Dropout(p=0.25, inplace=False)
  (gru): GRU(64, 64, num_layers=2, batch_first=True, dropout=0.25)
  (fc): Linear(in_features=64, out_features=50, bias=True)
)
Epoch 1, Train Loss: 0.1870, Val Loss: 0.1763
Epoch 2, Train Loss: 0.1813, Val Loss: 0.1753
Epoch 3, Train Loss: 0.1799, Val Loss: 0.1744
Epoch 4, Train Loss: 0.1792, Val Loss: 0.1733
Epoch 5, Train Loss: 0.1785, Val Loss: 0.1732
Epoch 6, Train Loss: 0.1778, Val Loss: 0.1721
Epoch 7, Train Loss: 0.1771, Val Loss: 0.1724
Epoch 8, Train Loss: 0.1765, Val Loss: 0.1715
Epoch 9, Train Loss: 0.1759, Val Loss: 0.1708
Epoch 10, Train Loss: 0.1752, Val Loss: 0.1718
Epoch 11, Train Loss: 0.1750, Val Loss: 0.1704
Epoch 12, Train Loss: 0.1744, Val Loss: 0.1706
Epoch 13, Train Loss: 0.1740, Val Loss: 0.1692
Epoch 14, Train Loss: 0.1736, Val Loss: 0.1693
Epoch 15, Train Loss: 0.1732, Val Loss: 0.1704
Epoch 16, Train 

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 1 - MAE: 0.3976, RMSE: 0.6613

Fold 2
GRUModelWithCNN(
  (conv1d_1): Conv1d(10, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (relu): ReLU()
  (dropout): Dropout(p=0.25, inplace=False)
  (gru): GRU(64, 64, num_layers=2, batch_first=True, dropout=0.25)
  (fc): Linear(in_features=64, out_features=50, bias=True)
)
Epoch 1, Train Loss: 0.1852, Val Loss: 0.1831
Epoch 2, Train Loss: 0.1794, Val Loss: 0.1804
Epoch 3, Train Loss: 0.1786, Val Loss: 0.1795
Epoch 4, Train Loss: 0.1775, Val Loss: 0.1797
Epoch 5, Train Loss: 0.1767, Val Loss: 0.1788
Epoch 6, Train Loss: 0.1762, Val Loss: 0.1779
Epoch 7, Train Loss: 0.1755, Val Loss: 0.1773
Epoch 8, Train Loss: 0.1749, Val Loss: 0.1770
Epoch 9, Train Loss: 0.1742, Val Loss: 0.1766
Epoch 10, Train Loss: 0.1738, Val Loss: 0.1765
Epoch 11, Train Loss: 0.1734, Val Loss: 0.1762
Epoch 12, Train Loss: 0.1729, Val Loss: 0.1753
Epoch 13, Train Loss: 0.1725, Val Loss: 0.1749
Epoch 14, Train Loss: 0.1721, Val Loss: 0.1758
Epoch 15, Train Loss: 0.1717

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 2 - MAE: 0.4008, RMSE: 0.6768

Fold 3
GRUModelWithCNN(
  (conv1d_1): Conv1d(10, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (relu): ReLU()
  (dropout): Dropout(p=0.25, inplace=False)
  (gru): GRU(64, 64, num_layers=2, batch_first=True, dropout=0.25)
  (fc): Linear(in_features=64, out_features=50, bias=True)
)
Epoch 1, Train Loss: 0.1867, Val Loss: 0.1789
Epoch 2, Train Loss: 0.1803, Val Loss: 0.1779
Epoch 3, Train Loss: 0.1791, Val Loss: 0.1767
Epoch 4, Train Loss: 0.1783, Val Loss: 0.1761
Epoch 5, Train Loss: 0.1778, Val Loss: 0.1755
Epoch 6, Train Loss: 0.1770, Val Loss: 0.1745
Epoch 7, Train Loss: 0.1766, Val Loss: 0.1753
Epoch 8, Train Loss: 0.1759, Val Loss: 0.1743
Epoch 9, Train Loss: 0.1753, Val Loss: 0.1738
Epoch 10, Train Loss: 0.1750, Val Loss: 0.1732
Epoch 11, Train Loss: 0.1744, Val Loss: 0.1729
Epoch 12, Train Loss: 0.1739, Val Loss: 0.1731
Epoch 13, Train Loss: 0.1736, Val Loss: 0.1719
Epoch 14, Train Loss: 0.1733, Val Loss: 0.1717
Epoch 15, Train Loss: 0.1730

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 3 - MAE: 0.3985, RMSE: 0.6691

Fold 4
GRUModelWithCNN(
  (conv1d_1): Conv1d(10, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (relu): ReLU()
  (dropout): Dropout(p=0.25, inplace=False)
  (gru): GRU(64, 64, num_layers=2, batch_first=True, dropout=0.25)
  (fc): Linear(in_features=64, out_features=50, bias=True)
)
Epoch 1, Train Loss: 0.1861, Val Loss: 0.1802
Epoch 2, Train Loss: 0.1800, Val Loss: 0.1799
Epoch 3, Train Loss: 0.1789, Val Loss: 0.1779
Epoch 4, Train Loss: 0.1781, Val Loss: 0.1768
Epoch 5, Train Loss: 0.1775, Val Loss: 0.1767
Epoch 6, Train Loss: 0.1767, Val Loss: 0.1759
Epoch 7, Train Loss: 0.1761, Val Loss: 0.1753
Epoch 8, Train Loss: 0.1756, Val Loss: 0.1750
Epoch 9, Train Loss: 0.1751, Val Loss: 0.1744
Epoch 10, Train Loss: 0.1745, Val Loss: 0.1752
Epoch 11, Train Loss: 0.1741, Val Loss: 0.1731
Epoch 12, Train Loss: 0.1736, Val Loss: 0.1731
Epoch 13, Train Loss: 0.1734, Val Loss: 0.1727
Epoch 14, Train Loss: 0.1728, Val Loss: 0.1727
Epoch 15, Train Loss: 0.1725

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 4 - MAE: 0.3920, RMSE: 0.6742

Fold 5
GRUModelWithCNN(
  (conv1d_1): Conv1d(10, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (relu): ReLU()
  (dropout): Dropout(p=0.25, inplace=False)
  (gru): GRU(64, 64, num_layers=2, batch_first=True, dropout=0.25)
  (fc): Linear(in_features=64, out_features=50, bias=True)
)
Epoch 1, Train Loss: 0.1844, Val Loss: 0.1841
Epoch 2, Train Loss: 0.1792, Val Loss: 0.1827
Epoch 3, Train Loss: 0.1778, Val Loss: 0.1828
Epoch 4, Train Loss: 0.1771, Val Loss: 0.1811
Epoch 5, Train Loss: 0.1764, Val Loss: 0.1803
Epoch 6, Train Loss: 0.1757, Val Loss: 0.1798
Epoch 7, Train Loss: 0.1749, Val Loss: 0.1795
Epoch 8, Train Loss: 0.1743, Val Loss: 0.1785
Epoch 9, Train Loss: 0.1738, Val Loss: 0.1784
Epoch 10, Train Loss: 0.1733, Val Loss: 0.1780
Epoch 11, Train Loss: 0.1727, Val Loss: 0.1773
Epoch 12, Train Loss: 0.1725, Val Loss: 0.1771
Epoch 13, Train Loss: 0.1720, Val Loss: 0.1764
Epoch 14, Train Loss: 0.1716, Val Loss: 0.1760
Epoch 15, Train Loss: 0.1712

  model.load_state_dict(torch.load(f'best_gru_model_fold{fold+1}.pth'))


Fold 5 - MAE: 0.4009, RMSE: 0.7048

Average MAE: 0.3979
Average RMSE: 0.6773
