In [40]:
import pandas as pd
import numpy as np

import os
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pack_sequence, pad_packed_sequence
from torch.utils.data import DataLoader
from torch.utils.data import Dataset 


In [41]:
#生成训练集
class Train_Loader(Dataset):
    def __init__(
            self, 
            data_dir, 
    ):
        self.data_dir = data_dir
        data = pd.read_excel(self.data_dir, skiprows=0)
        #将数据类型转换为float
        data = data.astype('float32')
        #缺失值使用上一行和下一行同一列的平均值填充
        data = data.fillna(data.mean())
        #对目标列(除最后三列)归一化
        scaler = MinMaxScaler(feature_range=(0, 1))
        data.iloc[:, :-3] = scaler.fit_transform(data.iloc[:, :-3])
        self.data = data.values
        self.scaler = scaler

    def __getitem__(self, i):
        features = self.data[i, :-3]
        #扩充为二维
        features = np.expand_dims(features, axis=0)
        targets = self.data[i, -3:]
        #扩充为二维
        targets = np.expand_dims(targets, axis=0)
        return features, targets
        
    def __len__(self):
        return len(self.data)
    

#生成测试集
class Test_Loader(Dataset):
    def __init__(
            self, 
            data_dir, 
            scaler,
    ):
        self.data_dir = data_dir
        data = pd.read_excel(self.data_dir, skiprows=0)
        #将数据类型转换为float
        data = data.astype('float32')
        #缺失值使用上一行和下一行同一列的平均值填充
        data = data.fillna(data.mean())
        #对目标列(除最后三列)归一化
        data.iloc[:, :-3] = scaler.transform(data.iloc[:, :-3])
        self.data = data.values

    def __getitem__(self, i):
        features = self.data[i, :-3]
        #扩充为二维
        features = np.expand_dims(features, axis=0)
        targets = self.data[i, -3:]
        #扩充为二维
        targets = np.expand_dims(targets, axis=0)
        return features, targets
        
    def __len__(self):
        return len(self.data)

In [42]:
train_data_dir="data/train_data.xlsx"
train_data_loader = Train_Loader(train_data_dir)

In [43]:
#查看feature，target数据形状
features, targets = train_data_loader[0]
print(features.shape)
print(targets.shape)
#查看train_data_loader数据长度
print(len(train_data_loader))

(1, 30)
(1, 3)
17211


In [44]:
batchsize=512*32
#创建数据加载器
train_data = DataLoader(train_data_loader, batch_size=batchsize, shuffle=True)
#查看数据加载器，数据形状
for features, targets in train_data:
    print(features.shape)
    print(targets.shape)
    break
print(len(train_data))

torch.Size([16384, 1, 30])
torch.Size([16384, 1, 3])
2


In [45]:
from nets.CNN_BIGRU_Attention import CNN_BiGRU_Attention
n_future = 30  # Example value, adjust as needed
n_class = 3   # Example value, adjust as needed
model = CNN_BiGRU_Attention(n_future, n_class)

#定义损失函数
criterion = nn.MSELoss()
#定义优化器
optimizer = optim.Adam(model.parameters(), lr=0.001)
model_dir="model/CNN_BiGRU_Attention"



In [46]:
model=torch.load("model/CNN_BiGRU_Attention588.952.pth")
model=model.cuda()

In [None]:
#计算验证集的MSE
valid_data_dir="data/valid_data.xlsx"
valid_data_loader = Test_Loader(valid_data_dir, train_data_loader.scaler)
valid_data = DataLoader(valid_data_loader, batch_size=batchsize, shuffle=False)
def valid(model, valid_data, criterion):
    model.eval()
    valid_loss = 0
    for features, targets in valid_data:
        features = features.cuda()
        targets = targets.cuda()
        outputs = model(features)
        loss = criterion(outputs, targets)
        valid_loss += loss.item()
    return valid_loss / len(valid_data)

In [None]:
#训练模型
n_epochs = 10000
min_loss=1000
for epoch in range(n_epochs):
    model.train()
    total_loss = 0
    for features, targets in train_data:
        features = features.cuda()
        targets = targets.cuda()
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, targets)
        #保存模型
        if loss<min_loss:
            min_loss=loss
            model_path=model_dir+f'{min_loss:.3f}.pth'
            torch.save(model,model_path)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print('epoch:', epoch, 'loss:', total_loss/len(train_data))