In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, mean_squared_error
import random

# 设置随机种子
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 在代码的开头设置随机种子
set_seed(42)

# 读取数据
data = pd.read_csv('final_50_all.csv')

# 特征工程
data['hour'] = pd.to_datetime(data['hour'])
data['year'] = data['hour'].dt.year
data['month'] = data['hour'].dt.month
data['day'] = data['hour'].dt.day
data['hour_of_day'] = data['hour'].dt.hour
data['weekday'] = data['hour'].dt.weekday

data['hour_sin'] = np.sin(2 * np.pi * data['hour_of_day'] / 24)
data['hour_cos'] = np.cos(2 * np.pi * data['hour_of_day'] / 24)
data['weekday_sin'] = np.sin(2 * np.pi * data['weekday'] / 7)
data['weekday_cos'] = np.cos(2 * np.pi * data['weekday'] / 7)
data['month_sin'] = np.sin(2 * np.pi * data['month'] / 12)
data['month_cos'] = np.cos(2 * np.pi * data['month'] / 12)

# 数值特征缩放
num_features = ['temperature_2m (°C)', 'apparent_temperature (°C)', 'rain (mm)', 'wind_speed_100m (km/h)']
scaler = MinMaxScaler()
data[num_features] = scaler.fit_transform(data[num_features])

data.drop(['month','day','hour', 'hour_of_day', 'weekday'], axis=1, inplace=True)

# 提取最终特征和目标
features = num_features + ['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos','month_sin','month_cos'] 
target = 'ride_count'

X = data[features].values
y = data[target].values

# 数据标准化
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

def create_sequences(data, target, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(target[i + sequence_length])
    return np.array(X), np.array(y)

# 设置序列长度
sequence_length = 24  # 例如使用过去24小时的数据预测
X, y = create_sequences(X, y, sequence_length)

# 使用 KFold 进行交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 初始化存储结果的列表
test_losses = []
maes = []
rmses = []

# 交叉验证过程
for fold, (train_index, val_index) in enumerate(kf.split(X)):
    print(f"\nFold {fold + 1}")

    # 分割数据
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # 转换为张量
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

    # 数据加载器
    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=32)

    # GRU 模型定义
    class GRUModel(nn.Module):
        def __init__(self, input_size, hidden_size, output_size, dropout=0.2):
            super(GRUModel, self).__init__()
            self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
            self.fc = nn.Linear(hidden_size, output_size)
            self.dropout = nn.Dropout(dropout)
        
        def forward(self, x):
            out, _ = self.gru(x)
            out = self.dropout(out[:, -1, :])
            out = self.fc(out)
            return out

    # 模型实例化
    input_size = len(features)
    hidden_size = 64
    output_size = 1
    model = GRUModel(input_size, hidden_size, output_size)

    # 损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练与验证
    patience = 10
    best_val_loss = float('inf')
    early_stop_counter = 0
    epochs = 100

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch).squeeze()
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                y_pred = model(X_batch).squeeze()
                loss = criterion(y_pred, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # 早停机制
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), f'best_model_fold{fold+1}.pth')  # 保存最佳模型
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

    # 测试集评估
    model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))
    model.eval()
    val_loss = 0
    y_preds = []
    y_trues = []

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            y_pred = model(X_batch).squeeze()
            y_preds.extend(y_pred.squeeze().tolist())
            y_trues.extend(y_batch.tolist())
            loss = criterion(y_pred.squeeze(), y_batch)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    mae = mean_absolute_error(y_trues, y_preds)
    rmse = mean_squared_error(y_trues, y_preds, squared=False)

    test_losses.append(val_loss)
    maes.append(mae)
    rmses.append(rmse)

    print(f"Fold {fold+1} - Val Loss: {val_loss:.4f}, MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# 输出所有折的平均性能
print(f"\nAverage Val Loss: {np.mean(test_losses):.4f}")
print(f"Average MAE: {np.mean(maes):.4f}")
print(f"Average RMSE: {np.mean(rmses):.4f}")



Fold 1
Epoch 1/100, Train Loss: 241.6660, Val Loss: 204.0920
Epoch 2/100, Train Loss: 178.1141, Val Loss: 96.8517
Epoch 3/100, Train Loss: 91.8842, Val Loss: 81.8310
Epoch 4/100, Train Loss: 85.1428, Val Loss: 78.5957
Epoch 5/100, Train Loss: 81.1591, Val Loss: 74.5196
Epoch 6/100, Train Loss: 77.0691, Val Loss: 76.9524
Epoch 7/100, Train Loss: 75.0562, Val Loss: 68.8411
Epoch 8/100, Train Loss: 73.6238, Val Loss: 67.9628
Epoch 9/100, Train Loss: 72.1286, Val Loss: 67.7098
Epoch 10/100, Train Loss: 71.4930, Val Loss: 69.6343
Epoch 11/100, Train Loss: 71.0846, Val Loss: 72.0692
Epoch 12/100, Train Loss: 71.0206, Val Loss: 66.2789
Epoch 13/100, Train Loss: 69.6638, Val Loss: 69.2332
Epoch 14/100, Train Loss: 69.3480, Val Loss: 65.3721
Epoch 15/100, Train Loss: 69.1656, Val Loss: 67.7241
Epoch 16/100, Train Loss: 68.3702, Val Loss: 66.1866
Epoch 17/100, Train Loss: 68.1723, Val Loss: 64.1679
Epoch 18/100, Train Loss: 68.2322, Val Loss: 64.2855
Epoch 19/100, Train Loss: 67.3804, Val Loss:

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Epoch 1/100, Train Loss: 239.9326, Val Loss: 211.1173
Epoch 2/100, Train Loss: 128.4493, Val Loss: 96.8173
Epoch 3/100, Train Loss: 89.3811, Val Loss: 89.6416
Epoch 4/100, Train Loss: 83.0593, Val Loss: 83.0211
Epoch 5/100, Train Loss: 80.5126, Val Loss: 80.5255
Epoch 6/100, Train Loss: 77.9772, Val Loss: 80.4410
Epoch 7/100, Train Loss: 76.5757, Val Loss: 84.4639
Epoch 8/100, Train Loss: 74.9072, Val Loss: 76.2546
Epoch 9/100, Train Loss: 74.4914, Val Loss: 76.7661
Epoch 10/100, Train Loss: 72.9230, Val Loss: 73.2208
Epoch 11/100, Train Loss: 72.3732, Val Loss: 72.1361
Epoch 12/100, Train Loss: 71.2725, Val Loss: 73.3580
Epoch 13/100, Train Loss: 70.5695, Val Loss: 75.6754
Epoch 14/100, Train Loss: 70.4493, Val Loss: 73.1658
Epoch 15/100, Train Loss: 70.0729, Val Loss: 70.6344
Epoch 16/100, Train Loss: 68.6346, Val Loss: 69.6905
Epoch 17/100, Train Loss: 68.5712, Val Loss: 71.4723
Epoch 18/100, Train Loss: 68.8897, Val Loss: 70.8749
Epoch 19/100, Train Loss: 67.7622, Val Loss: 69.2489

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Epoch 1/100, Train Loss: 242.2700, Val Loss: 200.5922
Epoch 2/100, Train Loss: 136.9376, Val Loss: 84.9218
Epoch 3/100, Train Loss: 85.1881, Val Loss: 77.5543
Epoch 4/100, Train Loss: 81.0161, Val Loss: 74.3012
Epoch 5/100, Train Loss: 78.3925, Val Loss: 75.2803
Epoch 6/100, Train Loss: 76.4969, Val Loss: 71.3549
Epoch 7/100, Train Loss: 75.4421, Val Loss: 67.9247
Epoch 8/100, Train Loss: 74.0869, Val Loss: 68.8100
Epoch 9/100, Train Loss: 72.7561, Val Loss: 67.2865
Epoch 10/100, Train Loss: 71.9577, Val Loss: 66.1494
Epoch 11/100, Train Loss: 71.7784, Val Loss: 66.0147
Epoch 12/100, Train Loss: 70.6677, Val Loss: 65.0590
Epoch 13/100, Train Loss: 70.6242, Val Loss: 66.7215
Epoch 14/100, Train Loss: 70.1775, Val Loss: 67.8436
Epoch 15/100, Train Loss: 69.0113, Val Loss: 65.7122
Epoch 16/100, Train Loss: 68.6837, Val Loss: 65.3643
Epoch 17/100, Train Loss: 68.7277, Val Loss: 63.5798
Epoch 18/100, Train Loss: 67.6006, Val Loss: 66.0958
Epoch 19/100, Train Loss: 67.5526, Val Loss: 62.3520

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Epoch 1/100, Train Loss: 240.5817, Val Loss: 201.1976
Epoch 2/100, Train Loss: 130.4706, Val Loss: 93.9749
Epoch 3/100, Train Loss: 86.8378, Val Loss: 84.3181
Epoch 4/100, Train Loss: 80.8047, Val Loss: 84.0227
Epoch 5/100, Train Loss: 77.9175, Val Loss: 82.4150
Epoch 6/100, Train Loss: 76.7853, Val Loss: 75.9607
Epoch 7/100, Train Loss: 75.1560, Val Loss: 77.4741
Epoch 8/100, Train Loss: 74.5724, Val Loss: 77.3363
Epoch 9/100, Train Loss: 73.6077, Val Loss: 76.7569
Epoch 10/100, Train Loss: 73.1731, Val Loss: 73.7442
Epoch 11/100, Train Loss: 72.4510, Val Loss: 74.9341
Epoch 12/100, Train Loss: 72.2585, Val Loss: 74.3125
Epoch 13/100, Train Loss: 71.4886, Val Loss: 73.2112
Epoch 14/100, Train Loss: 70.6598, Val Loss: 72.3335
Epoch 15/100, Train Loss: 70.4359, Val Loss: 72.1697
Epoch 16/100, Train Loss: 69.5200, Val Loss: 72.9805
Epoch 17/100, Train Loss: 69.5759, Val Loss: 70.7308
Epoch 18/100, Train Loss: 69.1551, Val Loss: 72.0843
Epoch 19/100, Train Loss: 68.8840, Val Loss: 71.7505

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Epoch 1/100, Train Loss: 243.1522, Val Loss: 199.8112
Epoch 2/100, Train Loss: 145.2670, Val Loss: 87.3651
Epoch 3/100, Train Loss: 87.8873, Val Loss: 79.5735
Epoch 4/100, Train Loss: 83.6562, Val Loss: 75.4123
Epoch 5/100, Train Loss: 80.9946, Val Loss: 73.6728
Epoch 6/100, Train Loss: 78.2872, Val Loss: 75.5868
Epoch 7/100, Train Loss: 77.3523, Val Loss: 70.1239
Epoch 8/100, Train Loss: 75.7364, Val Loss: 69.5849
Epoch 9/100, Train Loss: 75.5707, Val Loss: 69.2263
Epoch 10/100, Train Loss: 74.0288, Val Loss: 67.5589
Epoch 11/100, Train Loss: 72.6737, Val Loss: 68.4378
Epoch 12/100, Train Loss: 72.3557, Val Loss: 68.4907
Epoch 13/100, Train Loss: 71.6940, Val Loss: 65.1470
Epoch 14/100, Train Loss: 71.3085, Val Loss: 69.2056
Epoch 15/100, Train Loss: 70.7565, Val Loss: 65.7655
Epoch 16/100, Train Loss: 70.6000, Val Loss: 67.5651
Epoch 17/100, Train Loss: 70.3679, Val Loss: 65.4910
Epoch 18/100, Train Loss: 70.6347, Val Loss: 63.7418
Epoch 19/100, Train Loss: 69.5310, Val Loss: 67.4108

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))
