In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, mean_squared_error
import random

# 设置随机种子
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 在代码的开头设置随机种子
set_seed(42)

# 读取数据
data = pd.read_csv('final_50_all.csv')

num_features = ['temperature_2m (°C)', 'apparent_temperature (°C)', 'rain (mm)', 'wind_speed_100m (km/h)']

# 特征工程
data['hour'] = pd.to_datetime(data['hour'])
data['year'] = data['hour'].dt.year
data['month'] = data['hour'].dt.month
data['day'] = data['hour'].dt.day
data['hour_of_day'] = data['hour'].dt.hour
data['weekday'] = data['hour'].dt.weekday

data['hour_sin'] = np.sin(2 * np.pi * data['hour_of_day'] / 24)
data['hour_cos'] = np.cos(2 * np.pi * data['hour_of_day'] / 24)
data['weekday_sin'] = np.sin(2 * np.pi * data['weekday'] / 7)
data['weekday_cos'] = np.cos(2 * np.pi * data['weekday'] / 7)
data['month_sin'] = np.sin(2 * np.pi * data['month'] / 12)
data['month_cos'] = np.cos(2 * np.pi * data['month'] / 12)

# 数值特征缩放
scaler = MinMaxScaler()
data[num_features] = scaler.fit_transform(data[num_features])

data.drop(['month','day','hour', 'hour_of_day', 'weekday'], axis=1, inplace=True)

# 提取最终特征和目标
features = num_features + ['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos','month_sin','month_cos'] 
target = 'ride_count'

X = data[features].values
y = data[target].values

def create_sequences(data, target, sequence_length):
    """
    将时间序列数据转换为带有输入序列和目标值的形式
    """
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(target[i + sequence_length])
    return np.array(X), np.array(y)

# 设置序列长度
sequence_length = 24  # 例如使用过去24小时的数据预测
X, y = create_sequences(X, y, sequence_length)

# K折交叉验证（5折）
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold = 0

train_losses = []
val_losses = []
maes = []
rmses = []

for train_index, val_index in kf.split(X):
    fold += 1
    print(f"Fold {fold}/{5}")
    
    # 划分训练集和验证集
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # 转换为张量
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

    # 数据加载器
    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=32)
    
    # 定义GRU模型
    class GRUModel(nn.Module):
        def __init__(self, input_size, hidden_size, output_size, dropout, cnn_channels, kernel_size):
            super(GRUModel, self).__init__()
            # 添加 CNN 层
            self.conv1d = nn.Conv1d(in_channels=input_size, out_channels=cnn_channels, kernel_size=kernel_size, padding=kernel_size // 2)
            self.relu = nn.ReLU()

            # GRU 层
            self.gru = nn.GRU(cnn_channels, hidden_size, batch_first=True)
            
            # 全连接层
            self.fc = nn.Linear(hidden_size, output_size)
            self.dropout = nn.Dropout(dropout)
        
        def forward(self, x):
            # CNN 层
            x = x.permute(0, 2, 1)  # 为 Conv1d 调整形状
            x = self.conv1d(x)  # 通过 1D 卷积层
            x = self.relu(x)    # 激活函数
            x = x.permute(0, 2, 1)  # 调整形状回 [batch_size, seq_length, cnn_channels]

            # GRU 层
            out, _ = self.gru(x)  # [batch_size, seq_length, hidden_size]

            # Dropout 和全连接层
            out = self.dropout(out[:, -1, :])  # 取最后一个时间步的输出
            out = self.fc(out)  # 全连接层
            return out

    # 模型参数
    input_size = len(features)  # 输入特征数
    hidden_size = 64           # 隐藏层大小
    output_size = 1            # 输出大小（预测目标变量）
    dropout = 0.2              # Dropout 概率
    cnn_channels = 32         # CNN 输出通道数
    kernel_size = 3            # CNN 卷积核大小

    # 实例化模型
    model = GRUModel(input_size, hidden_size, output_size, dropout=dropout, cnn_channels=cnn_channels, kernel_size=kernel_size)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练与验证
    patience = 10
    best_val_loss = float('inf')
    early_stop_counter = 0
    epochs = 100

    # 训练循环
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)  # 前向传播
            loss = criterion(y_pred.squeeze(), y_batch)  # 计算损失
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数
            train_loss += loss.item()
        
        train_loss /= len(train_loader)

        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                y_pred = model(X_batch).squeeze()
                loss = criterion(y_pred, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # 早停机制
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), f'best_model_fold{fold}.pth')  # 保存最佳模型
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

    # 保存训练和验证损失
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    # 测试集评估
    model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))
    model.eval()
    y_preds = []
    y_trues = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            y_pred = model(X_batch).squeeze()
            y_preds.extend(y_pred.squeeze().tolist())
            y_trues.extend(y_batch.tolist())
            loss = criterion(y_pred.squeeze(), y_batch)

    mae = mean_absolute_error(y_trues, y_preds)
    rmse = mean_squared_error(y_trues, y_preds, squared=False)
    maes.append(mae)
    rmses.append(rmse)

    print(f"Fold {fold} - MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# 计算平均损失
avg_train_loss = np.mean(train_losses)
avg_val_loss = np.mean(val_losses)
avg_mae = np.mean(maes)
avg_rmse = np.mean(rmses)

print(f"Average Train Loss: {avg_train_loss:.4f}")
print(f"Average Val Loss: {avg_val_loss:.4f}")
print(f"Average MAE: {avg_mae:.4f}")
print(f"Average RMSE: {avg_rmse:.4f}")


Fold 1/5
Epoch 1/100, Train Loss: 208.5801, Val Loss: 94.0954
Epoch 2/100, Train Loss: 87.4629, Val Loss: 76.6271
Epoch 3/100, Train Loss: 80.4032, Val Loss: 72.1713
Epoch 4/100, Train Loss: 77.6992, Val Loss: 70.3980
Epoch 5/100, Train Loss: 74.8989, Val Loss: 69.4222
Epoch 6/100, Train Loss: 73.1783, Val Loss: 67.1117
Epoch 7/100, Train Loss: 71.9564, Val Loss: 66.8905
Epoch 8/100, Train Loss: 71.2526, Val Loss: 68.0847
Epoch 9/100, Train Loss: 70.2125, Val Loss: 66.2398
Epoch 10/100, Train Loss: 68.6244, Val Loss: 66.3271
Epoch 11/100, Train Loss: 67.5389, Val Loss: 64.9186
Epoch 12/100, Train Loss: 67.0646, Val Loss: 62.9630
Epoch 13/100, Train Loss: 65.7044, Val Loss: 66.4893
Epoch 14/100, Train Loss: 64.9073, Val Loss: 61.8055
Epoch 15/100, Train Loss: 63.9947, Val Loss: 61.3962
Epoch 16/100, Train Loss: 62.8527, Val Loss: 62.0814
Epoch 17/100, Train Loss: 62.0812, Val Loss: 59.1969
Epoch 18/100, Train Loss: 60.3634, Val Loss: 59.4344
Epoch 19/100, Train Loss: 59.9557, Val Loss: 

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 1 - MAE: 4.0742, RMSE: 5.7106
Fold 2/5
Epoch 1/100, Train Loss: 206.5955, Val Loss: 99.7624
Epoch 2/100, Train Loss: 85.1305, Val Loss: 82.9306
Epoch 3/100, Train Loss: 78.1954, Val Loss: 79.1361
Epoch 4/100, Train Loss: 75.5339, Val Loss: 76.0428
Epoch 5/100, Train Loss: 73.7041, Val Loss: 74.1541
Epoch 6/100, Train Loss: 72.2009, Val Loss: 73.3696
Epoch 7/100, Train Loss: 70.6865, Val Loss: 71.5918
Epoch 8/100, Train Loss: 69.7771, Val Loss: 70.1584
Epoch 9/100, Train Loss: 69.0219, Val Loss: 68.8362
Epoch 10/100, Train Loss: 68.2005, Val Loss: 69.4584
Epoch 11/100, Train Loss: 67.2638, Val Loss: 69.0474
Epoch 12/100, Train Loss: 66.1681, Val Loss: 66.6549
Epoch 13/100, Train Loss: 64.8201, Val Loss: 65.7309
Epoch 14/100, Train Loss: 65.1512, Val Loss: 67.3866
Epoch 15/100, Train Loss: 64.2776, Val Loss: 64.7903
Epoch 16/100, Train Loss: 62.4661, Val Loss: 62.9349
Epoch 17/100, Train Loss: 61.7621, Val Loss: 62.9392
Epoch 18/100, Train Loss: 61.4323, Val Loss: 60.6760
Epoch 19/1

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 2 - MAE: 4.1088, RMSE: 5.8390
Fold 3/5
Epoch 1/100, Train Loss: 208.5118, Val Loss: 94.9608
Epoch 2/100, Train Loss: 86.3270, Val Loss: 77.5573
Epoch 3/100, Train Loss: 79.8005, Val Loss: 74.0864
Epoch 4/100, Train Loss: 76.1881, Val Loss: 72.3546
Epoch 5/100, Train Loss: 74.3981, Val Loss: 68.0801
Epoch 6/100, Train Loss: 72.6422, Val Loss: 67.9173
Epoch 7/100, Train Loss: 70.4719, Val Loss: 68.2046
Epoch 8/100, Train Loss: 69.8750, Val Loss: 66.1362
Epoch 9/100, Train Loss: 68.6691, Val Loss: 65.8954
Epoch 10/100, Train Loss: 68.0454, Val Loss: 64.3695
Epoch 11/100, Train Loss: 67.0619, Val Loss: 62.1955
Epoch 12/100, Train Loss: 65.9232, Val Loss: 61.8646
Epoch 13/100, Train Loss: 65.4004, Val Loss: 63.7337
Epoch 14/100, Train Loss: 64.3127, Val Loss: 60.7713
Epoch 15/100, Train Loss: 63.7261, Val Loss: 60.5168
Epoch 16/100, Train Loss: 61.9694, Val Loss: 58.3113
Epoch 17/100, Train Loss: 61.8421, Val Loss: 55.8532
Epoch 18/100, Train Loss: 60.1742, Val Loss: 55.4431
Epoch 19/1

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 3 - MAE: 4.1326, RMSE: 5.7352
Fold 4/5
Epoch 1/100, Train Loss: 213.6399, Val Loss: 97.5329
Epoch 2/100, Train Loss: 86.3934, Val Loss: 83.4256
Epoch 3/100, Train Loss: 79.4871, Val Loss: 79.1585
Epoch 4/100, Train Loss: 75.4591, Val Loss: 76.4334
Epoch 5/100, Train Loss: 73.2496, Val Loss: 74.6247
Epoch 6/100, Train Loss: 71.4845, Val Loss: 72.8434
Epoch 7/100, Train Loss: 70.2884, Val Loss: 71.2589
Epoch 8/100, Train Loss: 68.5123, Val Loss: 70.5311
Epoch 9/100, Train Loss: 67.9852, Val Loss: 69.6697
Epoch 10/100, Train Loss: 67.1305, Val Loss: 68.8110
Epoch 11/100, Train Loss: 66.5886, Val Loss: 68.3395
Epoch 12/100, Train Loss: 65.1384, Val Loss: 72.7443
Epoch 13/100, Train Loss: 64.0943, Val Loss: 67.5077
Epoch 14/100, Train Loss: 63.6671, Val Loss: 64.6188
Epoch 15/100, Train Loss: 63.0686, Val Loss: 64.3293
Epoch 16/100, Train Loss: 61.6194, Val Loss: 64.6258
Epoch 17/100, Train Loss: 61.3892, Val Loss: 64.0613
Epoch 18/100, Train Loss: 60.3416, Val Loss: 63.3575
Epoch 19/1

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 4 - MAE: 4.2302, RMSE: 6.0783
Fold 5/5
Epoch 1/100, Train Loss: 208.4260, Val Loss: 93.3030
Epoch 2/100, Train Loss: 86.2124, Val Loss: 76.3103
Epoch 3/100, Train Loss: 79.0825, Val Loss: 79.8392
Epoch 4/100, Train Loss: 74.6233, Val Loss: 68.9157
Epoch 5/100, Train Loss: 72.8044, Val Loss: 67.8636
Epoch 6/100, Train Loss: 71.7495, Val Loss: 66.8940
Epoch 7/100, Train Loss: 70.2491, Val Loss: 65.3763
Epoch 8/100, Train Loss: 69.2111, Val Loss: 65.3427
Epoch 9/100, Train Loss: 67.8949, Val Loss: 66.4354
Epoch 10/100, Train Loss: 66.8587, Val Loss: 63.8238
Epoch 11/100, Train Loss: 66.6459, Val Loss: 63.1176
Epoch 12/100, Train Loss: 64.5584, Val Loss: 61.1291
Epoch 13/100, Train Loss: 64.5377, Val Loss: 59.7301
Epoch 14/100, Train Loss: 62.7974, Val Loss: 60.8087
Epoch 15/100, Train Loss: 62.5977, Val Loss: 66.8538
Epoch 16/100, Train Loss: 61.7071, Val Loss: 59.4470
Epoch 17/100, Train Loss: 60.9928, Val Loss: 57.3023
Epoch 18/100, Train Loss: 60.1095, Val Loss: 57.8217
Epoch 19/1

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 5 - MAE: 4.0758, RMSE: 5.7200
Average Train Loss: 28.4976
Average Val Loss: 34.3894
Average MAE: 4.1243
Average RMSE: 5.8166


