In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, mean_squared_error
import random

# 设置随机种子
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 在代码的开头设置随机种子
set_seed(42)

# 读取数据
data = pd.read_csv('final_50_all.csv')

# 特征工程
data['hour'] = pd.to_datetime(data['hour'])
data['year'] = data['hour'].dt.year
data['month'] = data['hour'].dt.month
data['day'] = data['hour'].dt.day
data['hour_of_day'] = data['hour'].dt.hour
data['weekday'] = data['hour'].dt.weekday

data['hour_sin'] = np.sin(2 * np.pi * data['hour_of_day'] / 24)
data['hour_cos'] = np.cos(2 * np.pi * data['hour_of_day'] / 24)
data['weekday_sin'] = np.sin(2 * np.pi * data['weekday'] / 7)
data['weekday_cos'] = np.cos(2 * np.pi * data['weekday'] / 7)
data['month_sin'] = np.sin(2 * np.pi * data['month'] / 12)
data['month_cos'] = np.cos(2 * np.pi * data['month'] / 12)

# 数值特征缩放
num_features = ['temperature_2m (°C)', 'apparent_temperature (°C)', 'rain (mm)', 'wind_speed_100m (km/h)']
scaler = MinMaxScaler()
data[num_features] = scaler.fit_transform(data[num_features])

data.drop(['month','day','hour', 'hour_of_day', 'weekday'], axis=1, inplace=True)

# 提取最终特征和目标
features = num_features + ['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos','month_sin','month_cos'] 
target = 'ride_count'

X = data[features].values
y = data[target].values

# 数据标准化
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

def create_sequences(data, target, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(target[i + sequence_length])
    return np.array(X), np.array(y)

# 设置序列长度
sequence_length = 48  # 例如使用过去24小时的数据预测
X, y = create_sequences(X, y, sequence_length)

# 使用 KFold 进行交叉验证
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# 初始化存储结果的列表
test_losses = []
maes = []
rmses = []

# 交叉验证过程
for fold, (train_index, val_index) in enumerate(kf.split(X)):
    print(f"\nFold {fold + 1}")

    # 分割数据
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # 转换为张量
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

    # 数据加载器
    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=32)

    # GRU 模型定义
    class GRUModel(nn.Module):
        def __init__(self, input_size, hidden_size, output_size, dropout=0.2):
            super(GRUModel, self).__init__()
            self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
            self.fc = nn.Linear(hidden_size, output_size)
            self.dropout = nn.Dropout(dropout)
        
        def forward(self, x):
            out, _ = self.gru(x)
            out = self.dropout(out[:, -1, :])
            out = self.fc(out)
            return out

    # 模型实例化
    input_size = len(features)
    hidden_size = 64
    output_size = 1
    model = GRUModel(input_size, hidden_size, output_size)

    # 损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练与验证
    patience = 10
    best_val_loss = float('inf')
    early_stop_counter = 0
    epochs = 100

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch).squeeze()
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)

        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                y_pred = model(X_batch).squeeze()
                loss = criterion(y_pred, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # 早停机制
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), f'best_model_fold{fold+1}.pth')  # 保存最佳模型
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

    # 测试集评估
    model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))
    model.eval()
    val_loss = 0
    y_preds = []
    y_trues = []

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            y_pred = model(X_batch).squeeze()
            y_preds.extend(y_pred.squeeze().tolist())
            y_trues.extend(y_batch.tolist())
            loss = criterion(y_pred.squeeze(), y_batch)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    mae = mean_absolute_error(y_trues, y_preds)
    rmse = mean_squared_error(y_trues, y_preds, squared=False)

    test_losses.append(val_loss)
    maes.append(mae)
    rmses.append(rmse)

    print(f"Fold {fold+1} - Val Loss: {val_loss:.4f}, MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# 输出所有折的平均性能
print(f"\nAverage Val Loss: {np.mean(test_losses):.4f}")
print(f"Average MAE: {np.mean(maes):.4f}")
print(f"Average RMSE: {np.mean(rmses):.4f}")



Fold 1
Epoch 1/100, Train Loss: 246.1560, Val Loss: 197.8238
Epoch 2/100, Train Loss: 175.6584, Val Loss: 94.2963
Epoch 3/100, Train Loss: 94.1037, Val Loss: 73.2805
Epoch 4/100, Train Loss: 84.5418, Val Loss: 71.5398
Epoch 5/100, Train Loss: 81.6871, Val Loss: 66.2769
Epoch 6/100, Train Loss: 78.5094, Val Loss: 66.4085
Epoch 7/100, Train Loss: 76.0935, Val Loss: 60.9171
Epoch 8/100, Train Loss: 74.1399, Val Loss: 58.2668
Epoch 9/100, Train Loss: 72.0687, Val Loss: 59.8770
Epoch 10/100, Train Loss: 71.4499, Val Loss: 56.2443
Epoch 11/100, Train Loss: 70.0213, Val Loss: 59.7523
Epoch 12/100, Train Loss: 68.9396, Val Loss: 56.6070
Epoch 13/100, Train Loss: 69.1519, Val Loss: 55.9724
Epoch 14/100, Train Loss: 68.9551, Val Loss: 54.9604
Epoch 15/100, Train Loss: 68.1389, Val Loss: 55.6181
Epoch 16/100, Train Loss: 66.6021, Val Loss: 53.9006
Epoch 17/100, Train Loss: 66.2443, Val Loss: 53.9918
Epoch 18/100, Train Loss: 66.5832, Val Loss: 53.3561
Epoch 19/100, Train Loss: 65.3172, Val Loss:

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Fold 1 - Val Loss: 31.2253, MAE: 4.0611, RMSE: 5.5804

Fold 2
Epoch 1/100, Train Loss: 242.0155, Val Loss: 201.6127
Epoch 2/100, Train Loss: 154.3249, Val Loss: 85.6566
Epoch 3/100, Train Loss: 86.5878, Val Loss: 80.0892
Epoch 4/100, Train Loss: 84.9729, Val Loss: 82.4470
Epoch 5/100, Train Loss: 83.6757, Val Loss: 80.0780
Epoch 6/100, Train Loss: 81.5831, Val Loss: 76.3372
Epoch 7/100, Train Loss: 78.3228, Val Loss: 70.9617
Epoch 8/100, Train Loss: 75.6154, Val Loss: 69.3608
Epoch 9/100, Train Loss: 73.9364, Val Loss: 69.5810
Epoch 10/100, Train Loss: 72.2543, Val Loss: 68.2846
Epoch 11/100, Train Loss: 71.2992, Val Loss: 66.3261
Epoch 12/100, Train Loss: 70.2745, Val Loss: 69.2637
Epoch 13/100, Train Loss: 69.0673, Val Loss: 66.3946
Epoch 14/100, Train Loss: 69.2494, Val Loss: 64.6332
Epoch 15/100, Train Loss: 68.1706, Val Loss: 63.8091
Epoch 16/100, Train Loss: 67.3103, Val Loss: 62.9145
Epoch 17/100, Train Loss: 66.2335, Val Loss: 62.6126
Epoch 18/100, Train Loss: 65.7499, Val Loss

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Fold 2 - Val Loss: 37.3877, MAE: 4.2895, RMSE: 6.1140

Fold 3
Epoch 1/100, Train Loss: 242.1657, Val Loss: 204.7490
Epoch 2/100, Train Loss: 150.9366, Val Loss: 87.1513
Epoch 3/100, Train Loss: 85.8048, Val Loss: 75.7719
Epoch 4/100, Train Loss: 80.1506, Val Loss: 72.5501
Epoch 5/100, Train Loss: 76.9892, Val Loss: 69.6114
Epoch 6/100, Train Loss: 74.9953, Val Loss: 67.8916
Epoch 7/100, Train Loss: 73.1502, Val Loss: 65.0174
Epoch 8/100, Train Loss: 72.3951, Val Loss: 64.4353
Epoch 9/100, Train Loss: 71.1912, Val Loss: 62.5632
Epoch 10/100, Train Loss: 69.0815, Val Loss: 65.5396
Epoch 11/100, Train Loss: 68.9703, Val Loss: 60.7355
Epoch 12/100, Train Loss: 68.2742, Val Loss: 62.6635
Epoch 13/100, Train Loss: 67.8118, Val Loss: 59.0013
Epoch 14/100, Train Loss: 66.9994, Val Loss: 62.1042
Epoch 15/100, Train Loss: 65.7139, Val Loss: 59.5951
Epoch 16/100, Train Loss: 65.9739, Val Loss: 58.6396
Epoch 17/100, Train Loss: 64.7203, Val Loss: 58.9255
Epoch 18/100, Train Loss: 64.3513, Val Loss

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Fold 3 - Val Loss: 33.0156, MAE: 4.1819, RMSE: 5.7369

Fold 4
Epoch 1/100, Train Loss: 240.1270, Val Loss: 206.9565
Epoch 2/100, Train Loss: 150.6391, Val Loss: 91.2153
Epoch 3/100, Train Loss: 87.8868, Val Loss: 79.8240
Epoch 4/100, Train Loss: 82.9068, Val Loss: 77.6733
Epoch 5/100, Train Loss: 79.1357, Val Loss: 74.6143
Epoch 6/100, Train Loss: 75.7776, Val Loss: 69.9183
Epoch 7/100, Train Loss: 72.8138, Val Loss: 66.5926
Epoch 8/100, Train Loss: 71.2353, Val Loss: 65.9540
Epoch 9/100, Train Loss: 70.2132, Val Loss: 66.0484
Epoch 10/100, Train Loss: 69.0064, Val Loss: 63.9124
Epoch 11/100, Train Loss: 67.8764, Val Loss: 63.3142
Epoch 12/100, Train Loss: 67.3152, Val Loss: 63.8738
Epoch 13/100, Train Loss: 67.3606, Val Loss: 62.0653
Epoch 14/100, Train Loss: 65.3636, Val Loss: 61.6085
Epoch 15/100, Train Loss: 65.6345, Val Loss: 63.2202
Epoch 16/100, Train Loss: 65.1723, Val Loss: 64.2068
Epoch 17/100, Train Loss: 65.1791, Val Loss: 60.6860
Epoch 18/100, Train Loss: 63.6719, Val Loss

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Fold 4 - Val Loss: 34.6724, MAE: 4.2435, RMSE: 5.8903

Fold 5
Epoch 1/100, Train Loss: 239.0828, Val Loss: 221.0264
Epoch 2/100, Train Loss: 147.3910, Val Loss: 123.6823
Epoch 3/100, Train Loss: 87.4487, Val Loss: 94.4411
Epoch 4/100, Train Loss: 77.0806, Val Loss: 91.7798
Epoch 5/100, Train Loss: 73.9128, Val Loss: 87.3770
Epoch 6/100, Train Loss: 71.5307, Val Loss: 86.2515
Epoch 7/100, Train Loss: 70.0943, Val Loss: 83.5803
Epoch 8/100, Train Loss: 68.6795, Val Loss: 85.0383
Epoch 9/100, Train Loss: 67.8791, Val Loss: 82.6992
Epoch 10/100, Train Loss: 67.0493, Val Loss: 79.5276
Epoch 11/100, Train Loss: 65.7123, Val Loss: 79.0777
Epoch 12/100, Train Loss: 65.4651, Val Loss: 78.5123
Epoch 13/100, Train Loss: 64.0683, Val Loss: 77.2214
Epoch 14/100, Train Loss: 63.5791, Val Loss: 76.7285
Epoch 15/100, Train Loss: 63.4537, Val Loss: 75.7571
Epoch 16/100, Train Loss: 62.7318, Val Loss: 75.2449
Epoch 17/100, Train Loss: 62.0938, Val Loss: 75.7592
Epoch 18/100, Train Loss: 61.2963, Val Los

  model.load_state_dict(torch.load(f'best_model_fold{fold+1}.pth'))


Fold 5 - Val Loss: 51.2297, MAE: 4.6840, RMSE: 7.1552

Average Val Loss: 37.5061
Average MAE: 4.2920
Average RMSE: 6.0954


