In [1]:
import torch
from torch import nn
# from model import LSTMModel,BiLSTM_FullOutput
import numpy as np
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
train_data_path = "../../train_hourly.csv"
test_data_path = "../../test_hourly.csv"
train_df = pd.read_csv(train_data_path, index_col='DateTime', parse_dates=True)
test_df = pd.read_csv(test_data_path, index_col='DateTime', parse_dates=True)

In [3]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import joblib # 用于保存scaler


train_scaled = train_df.values.astype(np.float32)  
test_scaled = test_df.values.astype(np.float32)


print(f"训练数据维度: {train_scaled.shape}")
print(f"测试数据维度: {test_scaled.shape}")

训练数据维度: (17911, 12)
测试数据维度: (16678, 12)


In [4]:
def create_sliding_windows(data, input_seq_len, output_seq_len, target_col_index):
    X, y = [], []
    n_samples = len(data)
    # 确保我们有足够的数据来创建至少一个窗口
    if n_samples < input_seq_len + output_seq_len:
        return np.array(X), np.array(y)
        
    for i in range(n_samples - input_seq_len - output_seq_len + 1):
        # 输入序列 (所有特征)
        input_window = data[i : i + input_seq_len, :]
        X.append(input_window)
        
        # 输出序列 (只包含目标特征)
        output_window = data[i + input_seq_len : i + input_seq_len + output_seq_len, target_col_index]
        y.append(output_window)
        
    return np.array(X), np.array(y)

# 定义常量
INPUT_SEQ_LEN = 90*24
OUTPUT_SEQ_LEN = 365*24
TARGET_COL_IDX = train_df.columns.get_loc('Global_active_power')


# --- 创建长期预测 (90 -> 365) 的训练和测试样本 ---
X_train_long, y_train_long = create_sliding_windows(train_scaled, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN, TARGET_COL_IDX)
X_test_long, y_test_long = create_sliding_windows(test_scaled, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN, TARGET_COL_IDX)

print("\n--- 长期预测 (90->365) 样本维度 ---")
print(f"X_train_long shape: {X_train_long.shape}")
print(f"y_train_long shape: {y_train_long.shape}")
print(f"X_test_long shape: {X_test_long.shape}")
print(f"y_test_long shape: {y_test_long.shape}")


--- 长期预测 (90->365) 样本维度 ---
X_train_long shape: (6992, 2160, 12)
y_train_long shape: (6992, 8760)
X_test_long shape: (5759, 2160, 12)
y_test_long shape: (5759, 8760)


In [5]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import joblib

# 检查是否有可用的GPU，并设置设备
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda:1


In [6]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        # 将Numpy数组转换为PyTorch张量
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)  # 保持二维 [N, output_seq_len]



    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# 创建训练集和测试集实例
train_dataset_long = TimeSeriesDataset(X_train_long, y_train_long)
test_dataset_long = TimeSeriesDataset(X_test_long, y_test_long)

# 创建数据加载器
BATCH_SIZE = 128
train_loader_long = DataLoader(train_dataset_long, batch_size=BATCH_SIZE, shuffle=True)
test_loader_long = DataLoader(test_dataset_long, batch_size=BATCH_SIZE, shuffle=False)

In [7]:
import torch
import torch.nn as nn
import math

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=10000):
        super().__init__()
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:x.size(1)].unsqueeze(0).to(x.device)

class TransformerEncoderDecoder(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_encoder_layers, num_decoder_layers,
                 input_seq_len, output_seq_len, dropout=0.1):
        super().__init__()
        self.input_proj = nn.Linear(input_dim, d_model)
        self.target_proj = nn.Linear(1, d_model)

        self.pos_enc_input = PositionalEncoding(d_model, max_len=input_seq_len)
        self.pos_enc_target = PositionalEncoding(d_model, max_len=output_seq_len)

        encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, d_model * 2, dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)

        decoder_layer = nn.TransformerDecoderLayer(d_model, nhead, d_model * 2, dropout, batch_first=True)
        self.decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers)

        self.output_layer = nn.Linear(d_model, 1)

    def forward(self, src, tgt):
        src = self.input_proj(src)  # [B, input_seq_len, d_model]
        src = self.pos_enc_input(src)
        memory = self.encoder(src)

        tgt = self.target_proj(tgt)  # [B, output_seq_len, d_model]
        tgt = self.pos_enc_target(tgt)

        output = self.decoder(tgt, memory)
        output = self.output_layer(output).squeeze(-1)  # [B, output_seq_len]
        return output


INPUT_DIM=12
INPUT_SEQ_LEN=90*24
OUTPUT_SEQ_LEN=365*24
D_MODEL=128
NHEAD=8
NUM_LAYERS=2
DROPOUT=0.3

# transformer_model_long = TransformerModel(INPUT_DIM,D_MODEL,NHEAD,NUM_LAYERS,OUTPUT_SEQ_LEN,DROPOUT,INPUT_SEQ_LEN).to(device)

In [8]:
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        
        decoder_input = torch.zeros(X_batch.size(0), OUTPUT_SEQ_LEN, 1).to(device)

        outputs = model(X_batch, decoder_input)  # 传入 decoder_input
        loss = criterion(outputs, y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * X_batch.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    return epoch_loss

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            decoder_input = torch.zeros(X_batch.size(0), OUTPUT_SEQ_LEN, 1).to(device)

            outputs = model(X_batch, decoder_input)  # 传入 decoder_input
            loss = criterion(outputs, y_batch)
            running_loss += loss.item() * X_batch.size(0)
            
    epoch_loss = running_loss / len(test_loader.dataset)
    return epoch_loss

def get_predictions(model, data_loader, device):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch = X_batch.to(device)
            decoder_input = torch.zeros(X_batch.size(0), OUTPUT_SEQ_LEN, 1).to(device)
            outputs = model(X_batch, decoder_input).cpu()

            predictions.append(outputs.numpy())
            actuals.append(y_batch.numpy())
    return np.concatenate(predictions), np.concatenate(actuals)

def inverse_transform_data(scaled_data, scaler, target_col_idx):
    # scaled_data shape: (num_samples, seq_len)
    num_samples = scaled_data.shape[0]
    num_features = scaler.n_features_in_
    
    # 创建一个 (num_samples * seq_len, num_features) 的零矩阵
    dummy_array = np.zeros((num_samples * scaled_data.shape[1], num_features))
    # 将我们的数据放入目标列
    dummy_array[:, target_col_idx] = scaled_data.flatten()
    
    # 执行反归一化
    inversed_data_flat = scaler.inverse_transform(dummy_array)[:, target_col_idx]
    
    # 重新塑形为 (num_samples, seq_len)
    return inversed_data_flat.reshape(num_samples, -1)

In [9]:
print("\n" + "="*30 + "\n LSTM Long-term Prediction (90 -> 365) \n" + "="*30)

NUM_EXPERIMENTS = 5
NUM_EPOCHS = 100 # 对于更难的长期任务，可能需要更多轮次

all_mse_scores_long = []
all_mae_scores_long = []
best_long_model_state = None
best_long_model_loss = float('inf')

for i in range(NUM_EXPERIMENTS):
    print(f"\n--- 实验轮次: {i+1}/{NUM_EXPERIMENTS} (Long-term) ---")
    
    # 1. 准备模型 (使用长期预测的参数)
    torch.manual_seed(42 + i)
    # 注意这里，我们使用 OUTPUT_WINDOW_LONG
    model = TransformerEncoderDecoder(
        input_dim=INPUT_DIM,
        d_model=D_MODEL,
        nhead=NHEAD,
        num_encoder_layers=NUM_LAYERS,
        num_decoder_layers=NUM_LAYERS,
        input_seq_len=INPUT_SEQ_LEN,
        output_seq_len=OUTPUT_SEQ_LEN,
        dropout=DROPOUT
    ).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=10, factor=0.5) #耐心可以适当增加

    # 2. 训练 (使用长期数据加载器)
    for epoch in range(NUM_EPOCHS):
        # 注意这里，我们使用 train_loader_long 和 test_loader_long
        train_loss = train_model(model, train_loader_long, criterion, optimizer, device)
        test_loss = evaluate_model(model, test_loader_long, criterion, device)
        scheduler.step(test_loss)
        if (epoch + 1) % 20 == 0:
            print(f'  Epoch [{epoch+1:02d}/{NUM_EPOCHS}], Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}')
    
    # 保存性能最好的模型
    if test_loss < best_long_model_loss:
        best_long_model_loss = test_loss
        best_long_model_state = model.state_dict()

    # 3. 评估
    predictions_scaled, actuals_scaled = get_predictions(model, test_loader_long, device)
    predictions = predictions_scaled
    actuals = actuals_scaled
    mse = mean_squared_error(actuals.flatten(), predictions.flatten())
    mae = mean_absolute_error(actuals.flatten(), predictions.flatten())
    all_mse_scores_long.append(mse)
    all_mae_scores_long.append(mae)
    print(f"  实验 {i+1} 结束. MSE: {mse:.4f}, MAE: {mae:.4f}")

# 打印长期预测的最终统计结果
mean_mse_long = np.mean(all_mse_scores_long)
std_mse_long = np.std(all_mse_scores_long)
mean_mae_long = np.mean(all_mae_scores_long)
std_mae_long = np.std(all_mae_scores_long)
print(f"\n长期预测最终结果: Avg MSE: {mean_mse_long:.4f} (±{std_mse_long:.4f}), Avg MAE: {mean_mae_long:.4f} (±{std_mae_long:.4f})")


# --- 可视化长期预测 (使用性能最好的模型) ---
# 1. 加载最佳模型
vis_model_long = TransformerEncoderDecoder(
        input_dim=INPUT_DIM,
        d_model=D_MODEL,
        nhead=NHEAD,
        num_encoder_layers=NUM_LAYERS,
        num_decoder_layers=NUM_LAYERS,
        input_seq_len=INPUT_SEQ_LEN,
        output_seq_len=OUTPUT_SEQ_LEN,
        dropout=DROPOUT
    ).to(device)
vis_model_long.load_state_dict(best_long_model_state)
# 2. 获取预测结果
predictions_scaled, actuals_scaled = get_predictions(vis_model_long, test_loader_long, device)
predictions = predictions_scaled
actuals = actuals_scaled



 LSTM Long-term Prediction (90 -> 365) 

--- 实验轮次: 1/5 (Long-term) ---


KeyboardInterrupt: 

In [None]:
model

In [None]:
# 3. 绘图
plt.figure(figsize=(15, 6))
plt.plot(actuals[0, :366], label='Ground Truth')
plt.plot(predictions[0, :366], label='Transformer Prediction')
plt.title('Transformer: 365-Day Power Consumption Prediction')
plt.xlabel('Days into the Future')
plt.ylabel('Daily Global Active Power (Sum)')

plt.xticks(np.arange(0, 366, 30), rotation=45)
plt.legend()
plt.grid(True)
plt.show()