In [13]:
import pandas as pd
import numpy as np

import os
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pack_sequence, pad_packed_sequence
from torch.utils.data import DataLoader
from torch.utils.data import Dataset 


In [14]:
#生成训练集
class Train_Loader(Dataset):
    def __init__(
            self, 
            data_dir, 
    ):
        self.data_dir = data_dir
        data = pd.read_excel(self.data_dir, skiprows=0)
        #将数据类型转换为float
        data = data.astype('float32')
        #缺失值使用上一行和下一行同一列的平均值填充
        data = data.fillna(data.mean())
        #对目标列(除最后三列)归一化
        scaler = MinMaxScaler(feature_range=(0, 1))
        data.iloc[:, :-3] = scaler.fit_transform(data.iloc[:, :-3])
        self.data = data.values
        self.scaler = scaler

    def __getitem__(self, i):
        features = self.data[i, :-3]
        #扩充为二维
        features = np.expand_dims(features, axis=0)
        targets = self.data[i, -3:]
        #扩充为二维
        targets = np.expand_dims(targets, axis=0)
        return features, targets
        
    def __len__(self):
        return len(self.data)
    

#生成测试集
class Test_Loader(Dataset):
    def __init__(
            self, 
            data_dir, 
            scaler,
    ):
        self.data_dir = data_dir
        data = pd.read_excel(self.data_dir, skiprows=0)
        #将数据类型转换为float
        data = data.astype('float32')
        #缺失值使用上一行和下一行同一列的平均值填充
        data = data.fillna(data.mean())
        #对目标列(除最后三列)归一化
        data.iloc[:, :-3] = scaler.transform(data.iloc[:, :-3])
        self.data = data.values

    def __getitem__(self, i):
        features = self.data[i, :-3]
        #扩充为二维
        features = np.expand_dims(features, axis=0)
        targets = self.data[i, -3:]
        #扩充为二维
        targets = np.expand_dims(targets, axis=0)
        return features, targets
        
    def __len__(self):
        return len(self.data)

In [15]:
train_data_dir="data/train_data.xlsx"
train_data_loader = Train_Loader(train_data_dir)

In [16]:
#查看feature，target数据形状
features, targets = train_data_loader[0]
print(features.shape)
print(targets.shape)
#查看train_data_loader数据长度
print(len(train_data_loader))

(1, 30)
(1, 3)
17211


In [17]:
batchsize=64
#创建数据加载器
train_data = DataLoader(train_data_loader, batch_size=batchsize, shuffle=True)
#查看数据加载器，数据形状
for features, targets in train_data:
    print(features.shape)
    print(targets.shape)
    break
print(len(train_data))

torch.Size([64, 1, 30])
torch.Size([64, 1, 3])
269


In [18]:
# from nets.CNN_BIGRU_Attention import CNN_BiGRU_Attention
from nets.CNN_Transformer import CNN_Transformer
n_future = 30  # Example value, adjust as needed
n_class = 3   # Example value, adjust as needed
model = CNN_Transformer(n_future, n_class)

#定义损失函数
criterion = nn.MSELoss()
#定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)
# model_dir="model/CNN_BiGRU_Attention"
model_dir="model/CNN_Transformer"


In [19]:
# model=torch.load("model/CNN_Transformer247.41838136212579.pth")


In [20]:
#计算验证集的MSE
valid_data_dir="data/valid_data.xlsx"
valid_data_loader = Test_Loader(valid_data_dir, train_data_loader.scaler)
valid_data = DataLoader(valid_data_loader, batch_size=batchsize, shuffle=False)
print(len(valid_data))


58


In [21]:
def valid(valid_model, valid_data, criterion):
    # valid_model.eval()
    valid_loss = 0
    for features, targets in valid_data:
        features = features.cuda()
        targets = targets.cuda()
        valid_model = valid_model.cuda()
        outputs = valid_model(features)
        loss = criterion(outputs, targets)
        valid_loss = valid_loss+loss.item()
    # print('valid_loss:', valid_loss / len(valid_data))
    return valid_loss / len(valid_data)

In [22]:
#直接训练模型，根据验证集MSE保存最优模型，超过50伦验证集MSE没有下降则修改学习率
min_valid_loss = float('inf')
no_improve = 0
for epoch in range(1000):
    model.train()
    train_loss = 0
    for features, targets in train_data:
        features = features.cuda()
        targets = targets.cuda()
        optimizer.zero_grad()
        model = model.cuda()
        outputs = model(features)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss = train_loss+loss.item()
    valid_loss = valid(model, valid_data, criterion)
    if valid_loss < min_valid_loss:
        min_valid_loss = valid_loss
        torch.save(model, model_dir + str(min_valid_loss) + '.pth')
        no_improve = 0
    else:
        no_improve += 1
    if no_improve > 50:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.1
            print('Learning rate has been changed to: {}'.format(param_group['lr']))
        no_improve = 0
    print('Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}'.format(epoch, train_loss / len(train_data), valid_loss))
    #将打印的训练集MSE和验证集MSE保存到txt文件
    with open(model_dir + 'loss.txt', 'a') as f:
        f.write('Epoch: {}, Train Loss: {:.4f}, Valid Loss: {:.4f}\n'.format(epoch, train_loss / len(train_data), valid_loss))
    if optimizer.param_groups[0]['lr'] < 1e-6:
        break


Epoch: 0, Train Loss: 23817.1336, Valid Loss: 661.8421
Epoch: 1, Train Loss: 832.7875, Valid Loss: 662.5304
Epoch: 2, Train Loss: 832.4102, Valid Loss: 673.1528
Epoch: 3, Train Loss: 832.3200, Valid Loss: 680.6970
Epoch: 4, Train Loss: 830.7601, Valid Loss: 594.4641
Epoch: 5, Train Loss: 388.7008, Valid Loss: 276.9430
Epoch: 6, Train Loss: 262.9813, Valid Loss: 260.9045
Epoch: 7, Train Loss: 252.4382, Valid Loss: 255.3658
Epoch: 8, Train Loss: 246.6610, Valid Loss: 260.1387
Epoch: 9, Train Loss: 245.3076, Valid Loss: 248.5932
Epoch: 10, Train Loss: 239.9886, Valid Loss: 251.0929
Epoch: 11, Train Loss: 234.1568, Valid Loss: 254.0376
Epoch: 12, Train Loss: 230.4918, Valid Loss: 256.2341
Epoch: 13, Train Loss: 228.5678, Valid Loss: 252.1108
Epoch: 14, Train Loss: 222.9076, Valid Loss: 252.6822
Epoch: 15, Train Loss: 217.3201, Valid Loss: 278.3437
Epoch: 16, Train Loss: 219.6167, Valid Loss: 246.6147
Epoch: 17, Train Loss: 218.4534, Valid Loss: 251.3415
Epoch: 18, Train Loss: 223.7419, Val

KeyboardInterrupt: 

In [None]:
model=torch.load("model/CNN_BiGRU_Attention644.167.pth")
#计算测试集MSE
test_data_dir="data/test_data.xlsx"
test_data_loader = Test_Loader(test_data_dir, train_data_loader.scaler)
test_data = DataLoader(test_data_loader, batch_size=1, shuffle=False)
print(len(test_data))

#计算测试集上真实值和预测值三个输出[0][1][2]百分比误差
def test(test_model, test_data):
    test_model.eval()
    test_loss = 0
    y_true = []
    y_pred = []
    for features, targets in test_data:
        features = features.cuda()
        targets = targets.cuda()
        test_model = test_model.cuda()
        outputs = test_model(features)

        print(outputs.cpu().detach().numpy())

        y_true.append(targets.cpu().detach().numpy())
        y_pred.append(outputs.cpu().detach().numpy())
    y_true = np.concatenate(y_true, axis=0)
    y_pred = np.concatenate(y_pred, axis=0)
    # print('test_loss:', test_loss / len(test_data))
    return test_loss / len(test_data), y_true, y_pred

