In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_absolute_error, mean_squared_error
import random

# 设置随机种子
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# 在代码的开头设置随机种子
set_seed(42)

# 读取数据
data = pd.read_csv('final_50_all.csv')

num_features = ['temperature_2m (°C)', 'apparent_temperature (°C)', 'rain (mm)', 'wind_speed_100m (km/h)']

# 特征工程
data['hour'] = pd.to_datetime(data['hour'])
data['year'] = data['hour'].dt.year
data['month'] = data['hour'].dt.month
data['day'] = data['hour'].dt.day
data['hour_of_day'] = data['hour'].dt.hour
data['weekday'] = data['hour'].dt.weekday

data['hour_sin'] = np.sin(2 * np.pi * data['hour_of_day'] / 24)
data['hour_cos'] = np.cos(2 * np.pi * data['hour_of_day'] / 24)
data['weekday_sin'] = np.sin(2 * np.pi * data['weekday'] / 7)
data['weekday_cos'] = np.cos(2 * np.pi * data['weekday'] / 7)
data['month_sin'] = np.sin(2 * np.pi * data['month'] / 12)
data['month_cos'] = np.cos(2 * np.pi * data['month'] / 12)

# 数值特征缩放
scaler = MinMaxScaler()
data[num_features] = scaler.fit_transform(data[num_features])

data.drop(['month','day','hour', 'hour_of_day', 'weekday'], axis=1, inplace=True)

# 提取最终特征和目标
features = num_features + ['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos','month_sin','month_cos'] 
target = 'ride_count'

X = data[features].values
y = data[target].values

def create_sequences(data, target, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(target[i + sequence_length])
    return np.array(X), np.array(y)

# 设置序列长度
sequence_length = 48  # 例如使用过去24小时的数据预测
X, y = create_sequences(X, y, sequence_length)

# K折交叉验证（5折）
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold = 0

train_losses = []
val_losses = []
maes = []
rmses = []

for train_index, val_index in kf.split(X):
    fold += 1
    print(f"Fold {fold}/{5}")
    
    # 划分训练集和验证集
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # 转换为张量
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

    # 数据加载器
    train_loader = DataLoader(TensorDataset(X_train_tensor, y_train_tensor), batch_size=32, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val_tensor, y_val_tensor), batch_size=32)
    
    # 定义GRU模型
    class GRUModel(nn.Module):
        def __init__(self, input_size, hidden_size, output_size, dropout, cnn_channels, kernel_size):
            super(GRUModel, self).__init__()
            # 添加 CNN 层
            self.conv1d = nn.Conv1d(in_channels=input_size, out_channels=cnn_channels, kernel_size=kernel_size, padding=kernel_size // 2)
            self.relu = nn.ReLU()

            # GRU 层
            self.gru = nn.GRU(cnn_channels, hidden_size, batch_first=True)
            
            # 全连接层
            self.fc = nn.Linear(hidden_size, output_size)
            self.dropout = nn.Dropout(dropout)
        
        def forward(self, x):
            # CNN 层
            x = x.permute(0, 2, 1)  # 为 Conv1d 调整形状
            x = self.conv1d(x)  # 通过 1D 卷积层
            x = self.relu(x)    # 激活函数
            x = x.permute(0, 2, 1)  # 调整形状回 [batch_size, seq_length, cnn_channels]

            # GRU 层
            out, _ = self.gru(x)  # [batch_size, seq_length, hidden_size]

            # Dropout 和全连接层
            out = self.dropout(out[:, -1, :])  # 取最后一个时间步的输出
            out = self.fc(out)  # 全连接层
            return out

    # 模型参数
    input_size = len(features)  # 输入特征数
    hidden_size = 64           # 隐藏层大小
    output_size = 1            # 输出大小（预测目标变量）
    dropout = 0.2              # Dropout 概率
    cnn_channels = 32         # CNN 输出通道数
    kernel_size = 3            # CNN 卷积核大小

    # 实例化模型
    model = GRUModel(input_size, hidden_size, output_size, dropout=dropout, cnn_channels=cnn_channels, kernel_size=kernel_size)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练与验证
    patience = 10
    best_val_loss = float('inf')
    early_stop_counter = 0
    epochs = 100

    # 训练循环
    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch)  # 前向传播
            loss = criterion(y_pred.squeeze(), y_batch)  # 计算损失
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数
            train_loss += loss.item()
        
        train_loss /= len(train_loader)

        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                y_pred = model(X_batch).squeeze()
                loss = criterion(y_pred, y_batch)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # 早停机制
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), f'best_model_fold{fold}.pth')  # 保存最佳模型
        else:
            early_stop_counter += 1

        if early_stop_counter >= patience:
            print("Early stopping triggered")
            break

    # 保存训练和验证损失
    train_losses.append(train_loss)
    val_losses.append(val_loss)

    # 测试集评估
    model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))
    model.eval()
    y_preds = []
    y_trues = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            y_pred = model(X_batch).squeeze()
            y_preds.extend(y_pred.squeeze().tolist())
            y_trues.extend(y_batch.tolist())
            loss = criterion(y_pred.squeeze(), y_batch)

    mae = mean_absolute_error(y_trues, y_preds)
    rmse = mean_squared_error(y_trues, y_preds, squared=False)
    maes.append(mae)
    rmses.append(rmse)

    print(f"Fold {fold} - MAE: {mae:.4f}, RMSE: {rmse:.4f}")

# 计算平均损失
avg_train_loss = np.mean(train_losses)
avg_val_loss = np.mean(val_losses)
avg_mae = np.mean(maes)
avg_rmse = np.mean(rmses)

print(f"Average Train Loss: {avg_train_loss:.4f}")
print(f"Average Val Loss: {avg_val_loss:.4f}")
print(f"Average MAE: {avg_mae:.4f}")
print(f"Average RMSE: {avg_rmse:.4f}")

# 预测结果可视化
plt.figure(figsize=(12, 6))
plt.plot(y_trues, label='True Values')
plt.plot(y_preds, label='Predicted Values')
plt.legend()
plt.title('True vs Predicted Ride Counts')
plt.show()


Fold 1/5
Epoch 1/100, Train Loss: 211.8895, Val Loss: 85.6599
Epoch 2/100, Train Loss: 88.6651, Val Loss: 70.4291
Epoch 3/100, Train Loss: 82.0591, Val Loss: 67.6829
Epoch 4/100, Train Loss: 78.9513, Val Loss: 65.4516
Epoch 5/100, Train Loss: 75.4269, Val Loss: 62.9589
Epoch 6/100, Train Loss: 71.9839, Val Loss: 56.2405
Epoch 7/100, Train Loss: 69.6626, Val Loss: 56.9513
Epoch 8/100, Train Loss: 68.6928, Val Loss: 58.0876
Epoch 9/100, Train Loss: 65.9801, Val Loss: 56.5902
Epoch 10/100, Train Loss: 65.0230, Val Loss: 52.2338
Epoch 11/100, Train Loss: 63.5771, Val Loss: 53.3956
Epoch 12/100, Train Loss: 62.0314, Val Loss: 50.7384
Epoch 13/100, Train Loss: 60.8786, Val Loss: 50.2723
Epoch 14/100, Train Loss: 58.6697, Val Loss: 48.4112
Epoch 15/100, Train Loss: 57.4735, Val Loss: 47.3544
Epoch 16/100, Train Loss: 56.5876, Val Loss: 47.0984
Epoch 17/100, Train Loss: 55.4515, Val Loss: 47.6233
Epoch 18/100, Train Loss: 52.9191, Val Loss: 49.4727
Epoch 19/100, Train Loss: 52.8762, Val Loss: 

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 1 - MAE: 3.9323, RMSE: 5.3646
Fold 2/5
Epoch 1/100, Train Loss: 221.1281, Val Loss: 98.3726
Epoch 2/100, Train Loss: 88.6461, Val Loss: 79.0548
Epoch 3/100, Train Loss: 82.0046, Val Loss: 75.2178
Epoch 4/100, Train Loss: 78.5717, Val Loss: 71.8692
Epoch 5/100, Train Loss: 75.8188, Val Loss: 68.5816
Epoch 6/100, Train Loss: 72.7312, Val Loss: 66.1998
Epoch 7/100, Train Loss: 70.4095, Val Loss: 63.8972
Epoch 8/100, Train Loss: 66.2875, Val Loss: 60.5802
Epoch 9/100, Train Loss: 63.6915, Val Loss: 59.2426
Epoch 10/100, Train Loss: 61.6661, Val Loss: 60.5702
Epoch 11/100, Train Loss: 60.6617, Val Loss: 56.5041
Epoch 12/100, Train Loss: 58.4769, Val Loss: 57.7951
Epoch 13/100, Train Loss: 56.4391, Val Loss: 54.0397
Epoch 14/100, Train Loss: 55.4928, Val Loss: 53.9983
Epoch 15/100, Train Loss: 54.3370, Val Loss: 52.1867
Epoch 16/100, Train Loss: 53.3600, Val Loss: 51.9885
Epoch 17/100, Train Loss: 51.9503, Val Loss: 48.8097
Epoch 18/100, Train Loss: 50.1963, Val Loss: 49.9104
Epoch 19/1

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 2 - MAE: 4.0476, RMSE: 5.7446
Fold 3/5
Epoch 1/100, Train Loss: 225.5532, Val Loss: 106.2064
Epoch 2/100, Train Loss: 88.8285, Val Loss: 76.7293
Epoch 3/100, Train Loss: 81.6490, Val Loss: 72.9416
Epoch 4/100, Train Loss: 77.4852, Val Loss: 69.1592
Epoch 5/100, Train Loss: 74.1404, Val Loss: 64.8570
Epoch 6/100, Train Loss: 70.8504, Val Loss: 64.0993
Epoch 7/100, Train Loss: 69.7482, Val Loss: 63.4515
Epoch 8/100, Train Loss: 68.0769, Val Loss: 64.2161
Epoch 9/100, Train Loss: 66.7258, Val Loss: 60.2579
Epoch 10/100, Train Loss: 65.3575, Val Loss: 59.7694
Epoch 11/100, Train Loss: 63.7579, Val Loss: 58.1671
Epoch 12/100, Train Loss: 62.7250, Val Loss: 58.0074
Epoch 13/100, Train Loss: 62.0285, Val Loss: 55.5864
Epoch 14/100, Train Loss: 60.5873, Val Loss: 53.4243
Epoch 15/100, Train Loss: 59.7533, Val Loss: 54.7077
Epoch 16/100, Train Loss: 58.4859, Val Loss: 54.3627
Epoch 17/100, Train Loss: 56.9678, Val Loss: 53.9278
Epoch 18/100, Train Loss: 56.9008, Val Loss: 51.8304
Epoch 19/

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 3 - MAE: 4.0383, RMSE: 5.5782
Fold 4/5
Epoch 1/100, Train Loss: 231.9961, Val Loss: 117.7923
Epoch 2/100, Train Loss: 90.6509, Val Loss: 79.0523
Epoch 3/100, Train Loss: 80.2593, Val Loss: 76.0482
Epoch 4/100, Train Loss: 76.4276, Val Loss: 71.9402
Epoch 5/100, Train Loss: 74.1681, Val Loss: 72.2468
Epoch 6/100, Train Loss: 70.8315, Val Loss: 64.2921
Epoch 7/100, Train Loss: 68.8386, Val Loss: 63.8791
Epoch 8/100, Train Loss: 67.1818, Val Loss: 63.6683
Epoch 9/100, Train Loss: 65.6823, Val Loss: 61.2453
Epoch 10/100, Train Loss: 64.1107, Val Loss: 63.7638
Epoch 11/100, Train Loss: 63.4970, Val Loss: 57.9138
Epoch 12/100, Train Loss: 62.0396, Val Loss: 56.4935
Epoch 13/100, Train Loss: 60.9565, Val Loss: 57.4387
Epoch 14/100, Train Loss: 59.7563, Val Loss: 61.0478
Epoch 15/100, Train Loss: 58.1763, Val Loss: 58.0753
Epoch 16/100, Train Loss: 56.7030, Val Loss: 51.5756
Epoch 17/100, Train Loss: 56.5956, Val Loss: 54.3311
Epoch 18/100, Train Loss: 55.1169, Val Loss: 49.5531
Epoch 19/

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 4 - MAE: 3.9037, RMSE: 5.4365
Fold 5/5
Epoch 1/100, Train Loss: 209.0782, Val Loss: 113.4483
Epoch 2/100, Train Loss: 82.7210, Val Loss: 94.5620
Epoch 3/100, Train Loss: 75.7973, Val Loss: 89.1939
Epoch 4/100, Train Loss: 71.1389, Val Loss: 87.8052
Epoch 5/100, Train Loss: 67.7931, Val Loss: 81.5930
Epoch 6/100, Train Loss: 65.3996, Val Loss: 78.5629
Epoch 7/100, Train Loss: 63.8311, Val Loss: 77.5699
Epoch 8/100, Train Loss: 62.1811, Val Loss: 77.4427
Epoch 9/100, Train Loss: 60.8041, Val Loss: 75.2093
Epoch 10/100, Train Loss: 60.2775, Val Loss: 74.6121
Epoch 11/100, Train Loss: 59.4127, Val Loss: 73.0144
Epoch 12/100, Train Loss: 58.1067, Val Loss: 72.1044
Epoch 13/100, Train Loss: 56.8237, Val Loss: 69.6289
Epoch 14/100, Train Loss: 55.7573, Val Loss: 68.0548
Epoch 15/100, Train Loss: 54.7421, Val Loss: 67.9063
Epoch 16/100, Train Loss: 54.0421, Val Loss: 67.3088
Epoch 17/100, Train Loss: 53.1734, Val Loss: 65.8806
Epoch 18/100, Train Loss: 52.0426, Val Loss: 65.6389
Epoch 19/

  model.load_state_dict(torch.load(f'best_model_fold{fold}.pth'))


Fold 5 - MAE: 4.1538, RMSE: 6.3286
Average Train Loss: 28.2179
Average Val Loss: 33.8922
Average MAE: 4.0151
Average RMSE: 5.6905


