In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import GCNConv
import os
import pandas as pd
import numpy as np
import random
import torch.nn.init as init
import pdb
from sklearn.metrics import mean_absolute_percentage_error
torch.autograd.set_detect_anomaly(True)
seed = 42

def file_name(file_dir,file_type='.csv'):#默认为文件夹下的所有文件
    lst = []
    for root, dirs, files in os.walk(file_dir):
        for file in files:
            if(file_type == ''):
                lst.append(file)
            else:
                if os.path.splitext(file)[1] == str(file_type):#获取指定类型的文件名
                    lst.append(file)
    return lst

def normalize0(inputs):
    normalized = []
    for eq in inputs:
        maks = np.max(np.abs(eq))
        if maks != 0:
            normalized.append(eq / maks)
        else:
            normalized.append(eq)
    return np.array(normalized)


def normalize1(inputs):
    normalized = []
    for eq in inputs:
        mean = np.mean(eq)
        std = np.std(eq)
        if std != 0:
            normalized.append((eq - mean) / std)
        else:
            normalized.append(eq)
    return np.array(normalized)


def normalize(inputs):
    normalized = []
    for eq in inputs:
        with np.errstate(invalid='ignore'):
            eps = 1e-10  # 可以根据需要调整epsilon的值

            eq_log = [np.log(x + eps) if i < 5 else x for i, x in enumerate(eq)]

            #eq_log = [np.log(x) if i < 5 else x for i, x in enumerate(eq)]
            eq_log1 = np.nan_to_num(eq_log).tolist()
            normalized.append(eq_log1)
    return np.array(normalized)


def k_fold_split(inputs, targets, K, seed=None):
    # 确保所有随机操作都使用相同的种子
    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)
        random.seed(seed)

    ind = int(len(inputs) / K)
    inputsK = []
    targetsK = []

    for i in range(0, K - 1):
        inputsK.append(inputs[i * ind:(i + 1) * ind])
        targetsK.append(targets[i * ind:(i + 1) * ind])

    inputsK.append(inputs[(i + 1) * ind:])
    targetsK.append(targets[(i + 1) * ind:])

    return inputsK, targetsK


def merge_splits(inputs, targets, k, K):
    if k != 0:
        z = 0
        inputsTrain = inputs[z]
        targetsTrain = targets[z]
    else:
        z = 1
        inputsTrain = inputs[z]
        targetsTrain = targets[z]

    for i in range(z + 1, K):
        if i != k:
            inputsTrain = np.concatenate((inputsTrain, inputs[i]))
            targetsTrain = np.concatenate((targetsTrain, targets[i]))

    return inputsTrain, targetsTrain, inputs[k], targets[k]


def targets_to_list(targets):
    targetList = np.array(targets)

    return targetList


In [4]:
from torch import nn
import torch
import torch.nn.functional as F
device = torch.device("cuda")
class nconv(nn.Module):
    def __init__(self):
        super(nconv, self).__init__()

    def forward(self, x, A):
        x = torch.einsum('ncvl,vw->ncwl', (x, A))
        return x.contiguous()

class GraphAttentionLayer(nn.Module):
    """
    Simple GAT layer, similar to https://arxiv.org/abs/1710.10903 
    图注意力层
    input: (B,N,C_in)
    output: (B,N,C_out)
    """
    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.in_features = in_features   # 节点表示向量的输入特征数
        self.out_features = out_features   # 节点表示向量的输出特征数
        self.dropout = dropout    # dropout参数
        self.alpha = alpha     # leakyrelu激活的参数
        self.concat = concat   # 如果为true, 再进行elu激活
        
        # 定义可训练参数，即论文中的W和a
        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))  
        nn.init.xavier_uniform_(self.W.data, gain=1.414)  # 初始化
        self.A = nn.Parameter(torch.zeros(size=(2*out_features, 16)))
        nn.init.xavier_uniform_(self.A.data, gain=1.414)   # 初始化
        
        # 定义leakyrelu激活函数
        self.leakyrelu = nn.LeakyReLU(self.alpha)
    
    def forward(self, inp, adj):
        """
        inp: input_fea [B,N, in_features]  in_features表示节点的输入特征向量元素个数
        adj: 图的邻接矩阵  [N, N] 非零即一，数据结构基本知识
        """
        h = torch.matmul(inp.double(), self.W.double())   # [B, N, out_features]
        N = h.size()[1]    # N 图的节点数

        a_input = torch.cat([h.repeat(1,1,N).view(-1, N*N, self.out_features), h.repeat(1, N, 1)], dim=-1).view(-1, N, N, 2*self.out_features)
        # [B, N, N, 2*out_features]
      
        E = [torch.matmul(a_input.double(), self.A[:,i].unsqueeze(1).double()).squeeze(3)[:,:,i] for i in range(N)]

        e = self.leakyrelu(torch.stack(E, dim=2))
        # print(e.shape)

        # [B, N, N, 1] => [B, N, N] 图注意力的相关系数（未归一化）
        
        zero_vec = -1e12 * torch.ones_like(e)    # 将没有连接的边置为负无穷


        attention = torch.where(adj>0, e, zero_vec)   # [B, N, N]
        # 表示如果邻接矩阵元素大于0时，则两个节点有连接，该位置的注意力系数保留，
        # 否则需要mask并置为非常小的值，原因是softmax的时候这个最小值会不考虑。
        attention = F.softmax(attention, dim=1)    # softmax形状保持不变 [B, N, N]，得到归一化的注意力权重！
        # print(attention.shape)
        attention = F.dropout(attention, self.dropout, training=self.training)   # dropout，防止过拟合
        h_prime = torch.matmul(attention, h)  # [B, N, N].[B, N, out_features] => [B, N, out_features]
        # 得到由周围节点通过注意力权重进行更新的表示
        if self.concat:
            return F.relu(h_prime)
        else:
            return h_prime 
    
    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

class GAT(nn.Module):
    def __init__(self, n_feat, n_hid, n_class, dropout, alpha, n_heads):
        """Dense version of GAT
        n_heads 表示有几个GAL层，最后进行拼接在一起，类似self-attention
        从不同的子空间进行抽取特征。
        """
        super(GAT, self).__init__()
        self.dropout = dropout 
        self.leakyrelu = nn.LeakyReLU()
        
        # 定义multi-head的图注意力层
        self.attentions = [GraphAttentionLayer(n_feat, n_hid, dropout=dropout, alpha=alpha, concat=True) for _ in range(n_heads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)   # 加入pytorch的Module模块
        # 输出层，也通过图注意力层来实现，可实现分类、预测等功能
        self.out_att = GraphAttentionLayer(n_hid * n_heads, n_class, dropout=dropout,alpha=alpha, concat=False)
    
    def forward(self, x, adj):
        x = F.dropout(x, self.dropout, training=self.training)   # dropout，防止过拟合
        x = torch.cat([att(x, adj) for att in self.attentions], dim=2)  # 将每个head得到的表示进行拼接
        x = F.dropout(x, self.dropout, training=self.training)   # dropout，防止过拟合
        x = self.out_att(x, adj)   # 输出并激活
        #x = F.log_softmax(x, dim=2)[:, -1, :]
        # print(x)
        # print(x)
        # print(x.shape)
        return x[:, -1, :] # log_softmax速度变快，保持数值稳定
    
    
    
    def fit(self,train_loader, val_loader,lr_rate,w_d,num_epochs=5000,patience=100):
        #best_val_loss = float('inf')
        best_val_loss = torch.tensor(float('inf'), dtype=torch.double)
        patience_counter = 0
        optimizer = optim.Adam(self.parameters(), lr=lr_rate,weight_decay=w_d)
        criterion = nn.MSELoss()
        
        for epoch in range(num_epochs):
            self.train()
            train_loss = 0.0 
            for inputs, graph_input, targets in train_loader:
                optimizer.zero_grad()
                outputs = self(inputs, graph_input)
                loss = criterion(outputs.squeeze(dim=1), targets)
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.parameters(), max_norm = 1)
                optimizer.step()
            #     train_loss += loss.item()
            # avg_train_loss = train_loss / len(train_loader)
            
            self.eval()
            val_loss = 0.0
            with torch.no_grad():
                for val_inputs, val_graph_input, val_targets in val_loader:
                    val_outputs = self(val_inputs, val_graph_input)
                    val_loss = criterion(val_outputs.squeeze(dim=1), val_targets)
            #         val_loss += val_loss.item()
            # avg_val_loss=val_loss / len(val_loader)
            
            if (epoch + 1) % 20 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Train Loss: {loss:.4f}, Val Loss: {val_loss:.4f}')
            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f'Early stopping at epoch {epoch+1}')
                    return
                
                
    def test(self,test_loader):
        self.eval()
        predictions = []
        for test_inputs, test_graph_input, _ in test_loader:
            batch_predictions = self(test_inputs, test_graph_input)
            predictions.append(batch_predictions)
        predictions = torch.cat(predictions)
        return predictions




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization
import numpy as np

# Assuming your PyTorch model is defined as before

def target_function(lr, hidden, nb_heads, dropout, alpha, weight_decay):
    # 在这里构建你的模型，并返回一个评估指标（例如，MAPE）
    model = GAT(7, int(hidden), 1, dropout, alpha, int(nb_heads)).to(device)
    train_loader = DataLoader(TensorDataset(torch.tensor(train_inputs).to(device), torch.tensor(train_graph_inputs).to(device),torch.tensor(train_targets).to(device)), batch_size=50, shuffle=True)
    val_loader = DataLoader(TensorDataset(torch.tensor(val_inputs).to(device), torch.tensor(val_graph_inputs).to(device), torch.tensor(val_targets).to(device)), batch_size=50, shuffle=False)
    model.fit(train_loader, val_loader, lr, weight_decay)
    predictions = model.test(val_loader)
    val_predictions = np.array([item.detach().cpu().numpy() for item in predictions]).flatten()
    mape = mean_absolute_percentage_error(val_targets, val_predictions)   
    return -np.mean(mape)  # 贝叶斯优化最小化目标，因此我们取负值

if __name__ == "__main__":
    lag_bin = 3
    lag_day = 3
    num_nodes = (int(lag_bin) + 1) * (int(lag_day) + 1)
    forecast_days = 15
    bin_num = 24
    random_state_here = 88
    mape_list = []
    data_dir = './data/volume/0308/'
    files = file_name('./data/')
    stocks_info = sorted(list(set(s.split('_25')[0] for s in files)))
    print(stocks_info)
    for stock_info in stocks_info[0:2]:
        print(f'>>>>>>>>>>>>>>>>>>>>{stock_info}>>>>>>>>>>>>>>>>>>>>>>>')
        data_dir1 = f'{data_dir}{stock_info}_{lag_bin}_{lag_day}'
        test_set_size = bin_num * forecast_days
        num_iterations = 10  # Define the number of random search iterations
        
        # 加载数据
        inputs_data = np.load(f'{data_dir1}_inputs.npy', allow_pickle=True)
        inputs_data = [[[torch.tensor(x, dtype=torch.float64) for x in sublist] for sublist in list1] for list1 in inputs_data]
        array_data = np.array(inputs_data)
        inputs = np.reshape(array_data, (len(inputs_data), num_nodes, -1))
        targets = np.load(f'{data_dir1}_output.npy', allow_pickle=True).astype(np.float64)
        graph_input = np.load(f'{data_dir1}_graph_input.npy', allow_pickle=True).astype(np.float64)
        graph_input = np.array([graph_input] * inputs.shape[0])
        graph_features = np.load(f'{data_dir1}_graph_coords.npy', allow_pickle=True).astype(np.float64)
        graph_features = np.array([graph_features] * inputs.shape[0])

        # 划分数据集，将数据集中的 15*24 天数据作为测试集
        test_inputs = inputs[-test_set_size:]
        test_targets = targets[-test_set_size:]
        test_graph_inputs = graph_input[-test_set_size:]
        test_graph_features = graph_features[:, -test_set_size:]

        # 去除测试集后剩下的数据
        train_val_inputs = inputs[:-test_set_size]
        train_val_targets = targets[:-test_set_size]
        train_val_graph_inputs = graph_input[:-test_set_size]
        train_val_graph_features = graph_features[:-test_set_size]

        # 将剩下的数据按照 80% 和 20% 划分为训练集和验证集
        val_size = int(train_val_inputs.shape[0] * 0.2)
        train_inputs, val_inputs, train_targets, val_targets, train_graph_inputs, val_graph_inputs = train_test_split(train_val_inputs, train_val_targets, train_val_graph_inputs, test_size=val_size, random_state=random_state_here)
        test_inputs = normalize(test_inputs)
        
        # 定义超参数搜索空间
        pbounds = {'lr': (0.0001, 0.01),
                   'hidden': (6, 9),
                   'nb_heads': (1, 8),
                   'dropout': (0.1, 0.5),
                   'alpha': (0.1, 0.9),
                   'weight_decay': (0.0001, 0.01)}
        
        # 创建贝叶斯优化对象
        optimizer = BayesianOptimization(f=target_function, pbounds=pbounds, random_state=1)
        
        # 运行贝叶斯优化搜索
        optimizer.maximize(init_points=10, n_iter=10)
        
        # 输出最佳超参数组合和对应的目标函数值
        print(optimizer.max)
        # 使用贝叶斯优化得到的最佳超参数来重新训练最佳模型
        best_lr = optimizer.max['params']['lr']
        best_hidden = optimizer.max['params']['hidden']
        best_nb_heads = optimizer.max['params']['nb_heads']
        best_dropout = optimizer.max['params']['dropout']
        best_alpha = optimizer.max['params']['alpha']
        best_weight_decay = optimizer.max['params']['weight_decay']

        # 重新构建模型并训练
        best_model = GAT(7, int(best_hidden), 1, best_dropout, best_alpha, int(best_nb_heads)).to(device)
        train_loader = DataLoader(TensorDataset(torch.tensor(train_inputs).to(device), torch.tensor(train_graph_inputs).to(device), torch.tensor(train_targets).to(device)), batch_size=50, shuffle=True)
        val_loader = DataLoader(TensorDataset(torch.tensor(val_inputs).to(device), torch.tensor(val_graph_inputs).to(device), torch.tensor(val_targets).to(device)), batch_size=50, shuffle=False)
        best_model.fit(train_loader, val_loader, best_lr, best_weight_decay)

        # 使用最佳模型进行预测
        test_loader = DataLoader(TensorDataset(torch.tensor(test_inputs).to(device), torch.tensor(test_graph_inputs).to(device), torch.tensor(test_targets).to(device)), batch_size=50, shuffle=False)
        predictions = best_model.test(test_loader)
        new_predictions = np.array([item.detach().cpu().numpy() for item in predictions]).flatten()

        # 计算最佳模型的MAPE
        best_MAPE = mean_absolute_percentage_error(test_targets, new_predictions)
        print('Best MAPE:', np.mean(best_MAPE))

        # 保存最佳模型的参数
        torch.save(best_model.state_dict(), f'models/gat_edge{stock_info}_{lag_bin}_{lag_day}_best_model.pt')

        # 保存最佳结果到 CSV 文件
        best_results = {
            'testTargets': test_targets.tolist(),
            'new_predictions': new_predictions.tolist(),
            'MAPE': best_MAPE.tolist()
        }
        res_df = pd.DataFrame(best_results)
        res_df.to_csv(f'./result/gat_edge{stock_info}_{lag_bin}_{lag_day}_best_results.csv', index=False)



['000046_XSHE', '000753_XSHE', '000951_XSHE', '000998_XSHE', '002282_XSHE', '002679_XSHE', '002841_XSHE', '002882_XSHE', '300133_XSHE', '300174_XSHE', '300263_XSHE', '300343_XSHE', '300433_XSHE', '300540_XSHE', '600622_XSHG', '603053_XSHG', '603095_XSHG', '603359_XSHG']
>>>>>>>>>>>>>>>>>>>>000046_XSHE>>>>>>>>>>>>>>>>>>>>>>>
|   iter    |  target   |   alpha   |  dropout  |  hidden   |    lr     | nb_heads  | weight... |
-------------------------------------------------------------------------------------------------
Epoch [20/5000], Train Loss: 261822451968.9021, Val Loss: 742201643523.1125
Epoch [40/5000], Train Loss: 589252049946.1726, Val Loss: 649694628895.6106
Epoch [60/5000], Train Loss: 227437208323.4013, Val Loss: 525825842874.0508
Epoch [80/5000], Train Loss: 446482223479.6632, Val Loss: 558449053251.7234
Epoch [100/5000], Train Loss: 202168880798.1959, Val Loss: 647346015844.0005
Epoch [120/5000], Train Loss: 2328226144798.3433, Val Loss: 532074929559.4905
Epoch [140/5000], T