In [12]:
import pandas as pd
import numpy as np

import os
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pack_sequence, pad_packed_sequence
from torch.utils.data import DataLoader
from torch.utils.data import Dataset 


In [13]:
#生成训练集
class Train_Loader(Dataset):
    def __init__(
            self, 
            data_dir, 
    ):
        self.data_dir = data_dir
        data = pd.read_excel(self.data_dir, skiprows=0)
        #将数据类型转换为float
        data = data.astype('float32')
        #缺失值使用上一行和下一行同一列的平均值填充
        data = data.fillna(data.mean())
        #对目标列(除最后三列)归一化
        scaler = MinMaxScaler(feature_range=(0, 1))
        data.iloc[:, :-3] = scaler.fit_transform(data.iloc[:, :-3])
        self.data = data.values
        self.scaler = scaler

    def __getitem__(self, i):
        features = self.data[i, :-3]
        #扩充为二维
        features = np.expand_dims(features, axis=0)
        targets = self.data[i, -3:]
        #扩充为二维
        targets = np.expand_dims(targets, axis=0)
        return features, targets
        
    def __len__(self):
        return len(self.data)
    

#生成测试集
class Test_Loader(Dataset):
    def __init__(
            self, 
            data_dir, 
            scaler,
    ):
        self.data_dir = data_dir
        data = pd.read_excel(self.data_dir, skiprows=0)
        #将数据类型转换为float
        data = data.astype('float32')
        #缺失值使用上一行和下一行同一列的平均值填充
        data = data.fillna(data.mean())
        #对目标列(除最后三列)归一化
        data.iloc[:, :-3] = scaler.transform(data.iloc[:, :-3])
        self.data = data.values

    def __getitem__(self, i):
        features = self.data[i, :-3]
        #扩充为二维
        features = np.expand_dims(features, axis=0)
        targets = self.data[i, -3:]
        #扩充为二维
        targets = np.expand_dims(targets, axis=0)
        return features, targets
        
    def __len__(self):
        return len(self.data)

In [14]:
train_data_dir="data/train_data.xlsx"
train_data_loader = Train_Loader(train_data_dir)

In [15]:
#查看feature，target数据形状
features, targets = train_data_loader[0]
print(features.shape)
print(targets.shape)
#查看train_data_loader数据长度
print(len(train_data_loader))

(1, 65)
(1, 3)
20157


In [16]:
batchsize=128
#创建数据加载器
train_data = DataLoader(train_data_loader, batch_size=batchsize, shuffle=True)
#查看数据加载器，数据形状
for features, targets in train_data:
    print(features.shape)
    print(targets.shape)
    break
print(len(train_data))

torch.Size([128, 1, 65])
torch.Size([128, 1, 3])
158


In [17]:
# from nets.CNN_BIGRU_Attention import CNN_BiGRU_Attention
from nets.CNN_Transformer import CNN_Transformer
n_future = 65  # Example value, adjust as needed
n_class = 3   # Example value, adjust as needed
model = CNN_Transformer(n_future, n_class)

#定义损失函数
criterion = nn.MSELoss()
#定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)
# model_dir="model/CNN_BiGRU_Attention"
model_dir="model/CNN_Transformer"


In [18]:
# model=torch.load("model/CNN_Transformer247.41838136212579.pth")


In [19]:
#计算验证集的MSE
valid_data_dir="data/valid_data.xlsx"
valid_data_loader = Test_Loader(valid_data_dir, train_data_loader.scaler)
valid_data = DataLoader(valid_data_loader, batch_size=batchsize, shuffle=False)
print(len(valid_data))


40


In [20]:
def valid(valid_model, valid_data, criterion):
    # valid_model.eval()
    valid_loss = 0
    for features, targets in valid_data:
        features = features.cuda()
        targets = targets.cuda()
        valid_model = valid_model.cuda()
        outputs = valid_model(features)
        loss = criterion(outputs, targets)
        valid_loss = valid_loss+loss.item()
    # print('valid_loss:', valid_loss / len(valid_data))
    return valid_loss / len(valid_data)

In [21]:
#直接训练模型，根据验证集MSE保存最优模型，超过50伦验证集MSE没有下降则修改学习率
min_valid_loss = float('inf')
no_improve = 0
for epoch in range(1000):
    model.train()
    train_loss = 0
    for features, targets in train_data:
        features = features.cuda()
        targets = targets.cuda()
        optimizer.zero_grad()
        model = model.cuda()
        outputs = model(features)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss = train_loss+loss.item()
    valid_loss = valid(model, valid_data, criterion)
    if valid_loss < min_valid_loss:
        min_valid_loss = valid_loss
        torch.save(model, model_dir + str(min_valid_loss) + '.pth')
        no_improve = 0
    else:
        no_improve += 1
    if no_improve > 50:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.1
            print('Learning rate has been changed to: {}'.format(param_group['lr']))
        no_improve = 0
    print('Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}'.format(epoch, train_loss / len(train_data), valid_loss))
    #将打印的训练集MSE和验证集MSE保存到txt文件
    with open(model_dir + 'loss.txt', 'a') as f:
        f.write('Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}\n'.format(epoch, train_loss / len(train_data), valid_loss))
    if optimizer.param_groups[0]['lr'] < 1e-6:
        break


Epoch: 0, Train Loss: 40860.6054, Valid Loss: 12137.7983
Epoch: 1, Train Loss: 10600.2515, Valid Loss: 1855.5761
Epoch: 2, Train Loss: 1009.4238, Valid Loss: 739.0311
Epoch: 3, Train Loss: 673.0568, Valid Loss: 559.3743
Epoch: 4, Train Loss: 512.6396, Valid Loss: 449.0355
Epoch: 5, Train Loss: 406.1927, Valid Loss: 422.8269
Epoch: 6, Train Loss: 384.4698, Valid Loss: 406.3339
Epoch: 7, Train Loss: 363.9380, Valid Loss: 356.0440
Epoch: 8, Train Loss: 342.8370, Valid Loss: 334.6470
Epoch: 9, Train Loss: 320.9771, Valid Loss: 315.6983
Epoch: 10, Train Loss: 285.1829, Valid Loss: 240.6401
Epoch: 11, Train Loss: 214.0111, Valid Loss: 193.9870
Epoch: 12, Train Loss: 206.3693, Valid Loss: 360.1801
Epoch: 13, Train Loss: 197.7278, Valid Loss: 211.2484
Epoch: 14, Train Loss: 176.5073, Valid Loss: 201.5024
Epoch: 15, Train Loss: 181.6936, Valid Loss: 168.8872
Epoch: 16, Train Loss: 163.0801, Valid Loss: 191.2275
Epoch: 17, Train Loss: 157.7598, Valid Loss: 174.6760
Epoch: 18, Train Loss: 357.414

In [23]:
model=torch.load("model/CNN_Transformer52.048096895217896.pth")
# model=torch.load("model/CNN_BiGRU_Attention238.75645760832163.pth")
#计算测试集MSE
test_data_dir="data/test_data.xlsx"
test_data_loader = Test_Loader(test_data_dir, train_data_loader.scaler)
test_data = DataLoader(test_data_loader, batch_size=1, shuffle=False)
print(len(test_data))

4447


In [None]:
# for features, targets in test_data:
#     features = features.cuda()
#     targets = targets.cuda()
#     test_model = model.cuda()
#     outputs = test_model(features)
#     print(outputs.cpu().detach().numpy()[0][0][0])
#     break


In [24]:
#计算测试集上的MSE,RMSE,MAE,R2
def test(test_model, test_data):
    test_model.eval()

    YS_true = []
    YS_pred = []

    TS_true = []
    TS_pred = []

    EL_true = []
    EL_pred = []

    for features, targets in test_data:
        features = features.cuda()
        targets = targets.cuda()
        test_model = test_model.cuda()
        outputs = test_model(features)
        YS_true.append(targets.cpu().detach().numpy()[0][0][0])
        YS_pred.append(outputs.cpu().detach().numpy()[0][0][0])
        TS_true.append(targets.cpu().detach().numpy()[0][0][1])
        TS_pred.append(outputs.cpu().detach().numpy()[0][0][1])
        EL_true.append(targets.cpu().detach().numpy()[0][0][2])
        EL_pred.append(outputs.cpu().detach().numpy()[0][0][2])
    #计算YS的RMSE,MAPE,R2
    YS_true = np.array(YS_true)
    YS_pred = np.array(YS_pred)
    YS_MSE = mean_squared_error(YS_true, YS_pred)
    YS_RMSE = np.sqrt(YS_MSE)
    YS_MAPE = np.mean(np.abs((YS_pred - YS_true) / YS_true))
    YS_R2 = 1 - YS_MSE / np.var(YS_true)
    print(f'YS_RMSE: {YS_RMSE:.4f},YS_MAPE: {YS_MAPE:.4f},YS_R2: {YS_R2:.4f}')
    #计算TS的RMSE,MAPE,R2
    TS_true = np.array(TS_true)
    TS_pred = np.array(TS_pred)
    TS_MSE = mean_squared_error(TS_true, TS_pred)
    TS_RMSE = np.sqrt(TS_MSE)
    TS_MAPE = np.mean(np.abs((TS_pred - TS_true) / TS_true))
    TS_R2 = 1 - TS_MSE / np.var(TS_true)
    print(f'TS_RMSE: {TS_RMSE:.4f},TS_MAPE: {TS_MAPE:.4f},TS_R2: {TS_R2:.4f}')
    #计算EL的RMSE,MAPE,R2
    EL_true = np.array(EL_true)
    EL_pred = np.array(EL_pred)
    EL_MSE = mean_squared_error(EL_true, EL_pred)
    EL_RMSE = np.sqrt(EL_MSE)
    EL_MAPE = np.mean(np.abs((EL_pred - EL_true) / EL_true))
    EL_R2 = 1 - EL_MSE / np.var(EL_true)
    print(f'EL_RMSE: {EL_RMSE:.4f},EL_MAPE: {EL_MAPE:.4f},EL_R2: {EL_R2:.4f}')
    

    return YS_pred, TS_pred, EL_pred, YS_true, TS_true, EL_true

YS_pred, TS_pred, EL_pred, YS_true, TS_true, EL_true = test(model, test_data)



    



YS_RMSE: 9.1290,YS_MAPE: 0.0238,YS_R2: 0.9932
TS_RMSE: 9.4785,TS_MAPE: 0.0148,TS_R2: 0.9962
EL_RMSE: 1.8514,EL_MAPE: 0.0391,EL_R2: 0.9620
