In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# 1. 加载数据
data = pd.read_csv('./data/total_load_actual.csv')

# 2. 检查数据是否包含 NaN
if data.isnull().any().any():
    print("Data contains NaN values. Filling missing values...")
    data = data.ffill()  # 使用前值填充缺失数据
else:
    print("Data does not contain NaN values.")

# 3. 将数据列转换为字典格式
data_dict = {}
for col in data.columns:
    data_dict[col] = data[col].tolist()

total_acual_load_list = data_dict['total load actual']

# 4. 归一化数据
scaler = MinMaxScaler(feature_range=(0, 1))
total_acual_load_list_scaled = scaler.fit_transform(np.array(total_acual_load_list).reshape(-1, 1)).flatten()

# 5. 设定序列长度和样本数
seq_length = 24
window_size = 8  # No overlap

# 6. 初始化输入和输出数据
x_processed = []
y_processed = []

max_i = (len(total_acual_load_list_scaled) - seq_length) // window_size

for i in range(max_i):
    x_sequence = total_acual_load_list_scaled[i * window_size : i * window_size + seq_length]
    y_value = total_acual_load_list_scaled[i * window_size + seq_length]
    x_processed.append(x_sequence)
    y_processed.append(y_value)

# Convert to NumPy arrays
x_processed = np.array(x_processed)
y_processed = np.array(y_processed)

# 7. 转换为NumPy数组 并且打乱数据
x_processed = np.array(x_processed)
y_processed = np.array(y_processed)

# 8. 转换为Tensor
x_processed_tensor = torch.tensor(x_processed, dtype=torch.float32)
y_processed_tensor = torch.tensor(y_processed, dtype=torch.float32).reshape(-1, 1)

# 9. 数据划分: 80% 训练集,20% 测试集 使用 train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_processed_tensor, y_processed_tensor, test_size=0.2, random_state=42, shuffle=False)

# 10. 定义前馈神经网络(BP神经网络)
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)  # 第一层,全连接层
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)  # 第二层,全连接层
        self.fc3 = nn.Linear(hidden_dim, output_dim)  # 输出层

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # ReLU激活函数
        x = torch.relu(self.fc2(x))  # ReLU激活函数
        x = self.fc3(x)  # 输出层
        return x

# 11. 定义模型
input_dim = seq_length  # 输入维度(序列长度)
hidden_dim = 16
output_dim = 1
model = MLP(input_dim, hidden_dim, output_dim)

# 12. 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 13. 训练模型
num_epochs = 10000
train_loss = []

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    output = model(x_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()
    
    train_loss.append(loss.item())
    if (epoch+1) % (num_epochs//10) == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.8f}')

# 14. 评估模型
model.eval()
with torch.no_grad():
    test_output = model(x_test)
    test_loss = criterion(test_output, y_test)
    print(f'Test Loss: {test_loss.item():.8f}')

Data contains NaN values. Filling missing values...
Epoch [1000/10000], Loss: 0.00117244
Epoch [2000/10000], Loss: 0.00083797
Epoch [3000/10000], Loss: 0.00076971
Epoch [4000/10000], Loss: 0.00072742
Epoch [5000/10000], Loss: 0.00069021
Epoch [6000/10000], Loss: 0.00066628
Epoch [7000/10000], Loss: 0.00064779
Epoch [8000/10000], Loss: 0.00063894
Epoch [9000/10000], Loss: 0.00062377
Epoch [10000/10000], Loss: 0.00061024
Test Loss: 0.00037241


In [2]:
from sklearn.metrics import mean_squared_error
import numpy as np

# 计算各类指标的函数
def calculate_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = np.mean(np.abs(y_true - y_pred))
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    rmse = np.sqrt(mse)
    r2 = 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))
    smape = np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred)))
    
    return mse, mae, mape, rmse, r2, smape

# 15. 测试模型
model.eval()
with torch.no_grad():
    predicted = model(x_test).cpu().numpy()

# 16. 反归一化预测结果
predicted_original_scale = scaler.inverse_transform(predicted)
y_test_original_scale = scaler.inverse_transform(y_test.cpu().numpy())

# 17. 计算归一化前的指标
mse_original, mae_original, mape_original, rmse_original, r2_original, smape_original = calculate_metrics(y_test_original_scale, predicted_original_scale)

# 18. 计算归一化后的指标
y_test_np = y_test.cpu().numpy().flatten()
predicted_np = predicted.flatten()
mse_normalized, mae_normalized, mape_normalized, rmse_normalized, r2_normalized, smape_normalized = calculate_metrics(y_test_np, predicted_np)

# 19. 打印结果
print('+=============+=============+=============+=============+')
print('|   Metrics (Original Scale)   |   Value   |   Metrics (Normalized Scale)   |   Value   |')
print('+=============+=============+=============+=============+')
metrics = [
    ('MSE', mse_original, mse_normalized),
    ('MAE', mae_original, mae_normalized),
    ('MAPE', mape_original, mape_normalized),
    ('RMSE', rmse_original, rmse_normalized),
    ('R²', r2_original, r2_normalized),
    ('SMAPE', smape_original, smape_normalized)
]

for metric, original_value, normalized_value in metrics:
    print(f'| {metric:<15} | {original_value:>26.6f} | {metric} (Normalized){"":<2} | {normalized_value:>26.6f} |')

print('+=============+=============+=============+=============+')


|   Metrics (Original Scale)   |   Value   |   Metrics (Normalized Scale)   |   Value   |
| MSE             |              196558.031250 | MSE (Normalized)   |                   0.000372 |
| MAE             |                 278.271576 | MAE (Normalized)   |                   0.012112 |
| MAPE            |                   0.998954 | MAPE (Normalized)   |                   3.169153 |
| RMSE            |                 443.348663 | RMSE (Normalized)   |                   0.019298 |
| R²              |                   0.988451 | R² (Normalized)   |                   0.988451 |
| SMAPE           |                   0.009984 | SMAPE (Normalized)   |                   0.031425 |


In [3]:
import plotly.graph_objs as go
import plotly.offline as pyo

#1. 训练损失的交互式可视化
train_loss_fig = go.Figure()

# 添加训练损失曲线
train_loss_fig.add_trace(go.Scatter(
    x=list(range(1, num_epochs + 1)),  # 横坐标是训练的轮数
    y=train_loss,  # 纵坐标是训练过程中的损失
    mode='lines',  # 线形显示
    name='Training Loss',  # 曲线的标签
    line=dict(color='royalblue', width=3, dash='dash'),  # 设置颜色、宽度和虚线样式
    marker=dict(symbol='circle', size=6, color='deepskyblue', line=dict(color='darkblue', width=2))  # 数据点样式
))

# 更新图表的布局
train_loss_fig.update_layout(
    title='BPNN:Training Loss Curve',  # 图表标题
    title_font=dict(size=20, family='Times New Roman', color='black'),  # 设置标题字体
    xaxis=dict(title='Epoch', title_font=dict(size=14, family='Times New Roman', color='black'), gridcolor='lightgray'),  # X轴标签
    yaxis=dict(title='Loss', title_font=dict(size=14, family='Times New Roman', color='black'), gridcolor='lightgray'),  # Y轴标签
    plot_bgcolor='whitesmoke',  # 设置图表背景色
    paper_bgcolor='whitesmoke',  # 设置图表外部背景色
    font=dict(family='Times New Roman', size=12, color='black'),  # 字体设置
    legend=dict(
        x=0.5, y=1.1,  # 设置图例位置
        xanchor='center',
        orientation='h',
        traceorder='normal',  # 图例顺序
        font=dict(family='Times New Roman', size=12, color='black'),
        bgcolor='rgba(255, 255, 255, 0.5)',  # 图例背景颜色
        bordercolor='black', borderwidth=1  # 图例边框颜色和宽度
    ),
    showlegend=True  # 显示图例
)

# 设置图表边缘的阴影效果
train_loss_fig.update_layout(
    xaxis=dict(showgrid=True, zeroline=False, showline=True, linecolor='gray'),  # X轴样式
    yaxis=dict(showgrid=True, zeroline=False, showline=True, linecolor='gray'),  # Y轴样式
    margin=dict(l=50, r=50, t=50, b=50),  # 设置图表的边距
    width = 1200,
    height = 600
)

train_loss_fig.show()

#2. 预测与实际结果的交互式可视化
# 创建实际结果的曲线
actual_trace = go.Scatter(
    x=list(range(100)),  # 横坐标是时间步
    y=y_test_original_scale[:100].flatten(),  # 纵坐标是实际测试数据(前100个数据点)
    mode='lines+markers',  # 线条和数据点显示
    name='Actual',  # 曲线名称
    line=dict(color='forestgreen', width=3, shape='spline'),  # 设置颜色、宽度和曲线类型
    marker=dict(symbol='circle', size=8, color='limegreen', line=dict(color='darkgreen', width=2))  # 数据点样式
)

# 创建预测结果的曲线
predicted_trace = go.Scatter(
    x=list(range(100)),  # 横坐标是时间步
    y=predicted_original_scale[:100].flatten(),  # 纵坐标是模型的预测数据(前100个数据点)
    mode='lines+markers',  # 线条和数据点显示
    name='Predicted',  # 曲线名称
    line=dict(color='firebrick', width=3, dash='dot'),  # 设置颜色、宽度和虚线样式
    marker=dict(symbol='x', size=8, color='tomato', line=dict(color='darkred', width=2))  # 数据点样式
)

# 创建包含实际和预测数据的图表
comparison_fig = go.Figure(data=[actual_trace, predicted_trace])

# 更新布局设置
comparison_fig.update_layout(
    title='BPNN:Actual vs Predicted',  # 图表标题
    title_font=dict(size=20, family='Times New Roman', color='black'),  # 标题字体
    xaxis=dict(title='Time Step', title_font=dict(size=14, family='Times New Roman', color='black'), gridcolor='lightgray'),  # X轴标签
    yaxis=dict(title='Load', title_font=dict(size=14, family='Times New Roman', color='black'), gridcolor='lightgray'),  # Y轴标签
    plot_bgcolor='whitesmoke',  # 图表背景颜色
    paper_bgcolor='whitesmoke',  # 外部背景颜色
    font=dict(family='Times New Roman', size=12, color='black'),  # 字体设置
    legend=dict(
        x=0.5, y=1.1,  # 设置图例位置
        xanchor='center',
        orientation='h',
        traceorder='normal',  # 图例顺序
        font=dict(family='Times New Roman', size=12, color='black'),
        bgcolor='rgba(255, 255, 255, 0.5)',  # 图例背景颜色
        bordercolor='black', borderwidth=1  # 图例边框颜色和宽度
    ),
    showlegend=True  # 显示图例
)

# 为图表添加渐变背景色
comparison_fig.update_layout(
    paper_bgcolor='rgba(240, 240, 240, 0.9)',  # 外部背景渐变色
    plot_bgcolor='rgba(255, 255, 255, 0.9)',  # 图表区域背景渐变色
    width = 1200,
    height = 600
)

# 显示预测与实际结果的图表
comparison_fig.show()   