In [4]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATv2Conv
from torch_geometric.utils import dense_to_sparse
from sklearn.model_selection import ParameterGrid

# 读取数据
file_path = "/home/develop/GATv2/data/final.csv"
data = pd.read_csv(file_path)
features = torch.tensor(data.values, dtype=torch.float32)  # 特征矩阵 (1186, 42)

# 转置特征矩阵，适配 GAT 输入 (节点数, 特征维度)
features = features.T  # (42, 1186)

# 创建边索引（完全图假设，每个特征和其他特征都有边）
num_features = features.size(0)
adj_matrix = torch.ones((num_features, num_features)) - torch.eye(num_features)  # 完全图
edge_index = dense_to_sparse(adj_matrix)[0]

# 构造图数据
graph_data = Data(x=features, edge_index=edge_index)

# GATv2 模型定义
class GATv2Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super(GATv2Net, self).__init__()
        self.conv1 = GATv2Conv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATv2Conv(hidden_channels * heads, out_channels, heads=1, concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

# 网格搜索的超参数组合
param_grid = {
    'hidden_channels': [8, 16, 32],
    'heads': [1, 2, 4],
    'learning_rate': [0.001, 0.005],
    'weight_decay': [0.0, 1e-4]
}
grid = ParameterGrid(param_grid)

# 早停策略
class EarlyStopping:
    def __init__(self, patience=10):
        self.patience = patience
        self.best_loss = float('inf')
        self.counter = 0
        self.early_stop = False

    def step(self, loss):
        if loss < self.best_loss:
            self.best_loss = loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

# 日志保存路径
log_dir = "/home/develop/GATv2/Result"
os.makedirs(log_dir, exist_ok=True)
log_file = os.path.join(log_dir, "GATv2_Train.log")

# 训练过程
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
results = []

with open(log_file, "w") as log:
    for params in grid:
        # 打印当前超参数组合
        log.write(f"Training with params: {params}\n")
        print(f"Training with params: {params}")
        
        # 模型实例化
        model = GATv2Net(
            in_channels=features.size(1),
            hidden_channels=params['hidden_channels'],
            out_channels=features.size(1),
            heads=params['heads']
        ).to(device)

        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=params['learning_rate'],
            weight_decay=params['weight_decay']
        )
        loss_fn = nn.MSELoss()

        graph_data = graph_data.to(device)
        model.train()
        early_stopping = EarlyStopping(patience=10)

        for epoch in range(100):  # 最大训练轮数
            optimizer.zero_grad()
            out = model(graph_data.x, graph_data.edge_index)  # 前向传播
            loss = loss_fn(out, graph_data.x)  # 重构损失
            loss.backward()  # 反向传播
            optimizer.step()  # 参数更新

            early_stopping.step(loss.item())
            if early_stopping.early_stop:
                break

        # 记录结果
        final_loss = early_stopping.best_loss
        results.append({
            'params': params,
            'final_loss': final_loss
        })

        log.write(f"Final Loss for params {params}: {final_loss:.6f}\n")
        log.write("-" * 50 + "\n")
        print(f"Final Loss for params {params}: {final_loss:.6f}")

# 打印最佳超参数组合
best_result = min(results, key=lambda x: x['final_loss'])
with open(log_file, "a") as log:
    log.write("\nBest Hyperparameters:\n")
    log.write(str(best_result['params']) + "\n")
    log.write(f"Best Loss: {best_result['final_loss']:.6f}\n")

print("\nBest Hyperparameters:", best_result['params'])
print("Best Loss:", best_result['final_loss'])


Training with params: {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.001, 'weight_decay': 0.0}
Final Loss for params {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.001, 'weight_decay': 0.0}: 0.054420
Training with params: {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.001, 'weight_decay': 0.0001}
Final Loss for params {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.001, 'weight_decay': 0.0001}: 0.060208
Training with params: {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.005, 'weight_decay': 0.0}
Final Loss for params {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.005, 'weight_decay': 0.0}: 0.112538
Training with params: {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.005, 'weight_decay': 0.0001}
Final Loss for params {'heads': 1, 'hidden_channels': 8, 'learning_rate': 0.005, 'weight_decay': 0.0001}: 0.048001
Training with params: {'heads': 1, 'hidden_channels': 16, 'learning_rate': 0.001, 'weight_decay': 0.0}
Final Loss for params {'heads

KeyboardInterrupt: 

### 根据确定超参数训练并保存模型

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATv2Conv
from torch_geometric.utils import dense_to_sparse
import pandas as pd


# 超参数
hidden_channels = 16  # 隐藏层维度
heads = 4  # 多头注意力
learning_rate = 0.001  # 学习率
weight_decay = 0.0  # 权重衰减
epochs = 100  # 最大训练轮数
patience = 10  # 早停容忍次数
save_path = "/home/develop/GATv2/Model/GATv2_trained.pth"  # 保存路径
log_file = "/home/develop/GATv2/Result/GATv2_Train.log"  # 日志路径

# 数据加载与预处理
file_path = "/home/develop/GATv2/data/final.csv"
data = pd.read_csv(file_path)
features = torch.tensor(data.values, dtype=torch.float32)  # 特征矩阵 (1186, 42)

# 转置特征矩阵，适配 GAT 输入 (节点数, 特征维度)
features = features.T  # (42, 1186)

# 创建边索引（完全图假设，每个特征和其他特征都有边）
num_features = features.size(0)
adj_matrix = torch.ones((num_features, num_features)) - torch.eye(num_features)  # 完全图
edge_index = dense_to_sparse(adj_matrix)[0]

# 构造图数据
graph_data = Data(x=features, edge_index=edge_index)

# GATv2 模型定义
class GATv2Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super(GATv2Net, self).__init__()
        self.conv1 = GATv2Conv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATv2Conv(hidden_channels * heads, out_channels, heads=1, concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

# 早停机制
class EarlyStopping:
    def __init__(self, patience=10):
        self.patience = patience
        self.best_loss = float('inf')
        self.counter = 0
        self.early_stop = False

    def step(self, loss):
        if loss < self.best_loss:
            self.best_loss = loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

# 确保日志目录存在
os.makedirs(os.path.dirname(log_file), exist_ok=True)

# 初始化模型和优化器
device = torch.device('cpu')  # 强制使用 CPU
model = GATv2Net(
    in_channels=features.size(1),
    hidden_channels=hidden_channels,
    out_channels=features.size(1),  # 输出维度等于输入维度（重构任务）
    heads=heads
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
loss_fn = nn.MSELoss()

# 确保 graph_data 的所有元素移动到设备
graph_data = graph_data.to(device)

# 模型训练
model.train()
early_stopping = EarlyStopping(patience=patience)

with open(log_file, "w") as log:
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(graph_data.x, graph_data.edge_index)  # 前向传播
        loss = loss_fn(out, graph_data.x)  # 重构损失
        loss.backward()  # 反向传播
        optimizer.step()  # 参数更新

        # 记录日志
        log.write(f"Epoch {epoch}/{epochs}, Loss: {loss.item():.6f}\n")
        print(f"Epoch {epoch}/{epochs}, Loss: {loss.item():.6f}")

        # 检查早停
        early_stopping.step(loss.item())
        if early_stopping.early_stop:
            print(f"Early stopping triggered at epoch {epoch}")
            log.write(f"Early stopping triggered at epoch {epoch}\n")
            break

# 保存训练好的模型
torch.save(model, save_path)
print(f"Model saved to {save_path}")
with open(log_file, "a") as log:
    log.write(f"Model saved to {save_path}\n")


Epoch 0/100, Loss: 0.267303
Epoch 1/100, Loss: 0.239654
Epoch 2/100, Loss: 0.229990
Epoch 3/100, Loss: 0.219853
Epoch 4/100, Loss: 0.207357
Epoch 5/100, Loss: 0.193916
Epoch 6/100, Loss: 0.181270
Epoch 7/100, Loss: 0.169799
Epoch 8/100, Loss: 0.159136
Epoch 9/100, Loss: 0.149655
Epoch 10/100, Loss: 0.141743
Epoch 11/100, Loss: 0.135451
Epoch 12/100, Loss: 0.130533
Epoch 13/100, Loss: 0.126698
Epoch 14/100, Loss: 0.123701
Epoch 15/100, Loss: 0.121346
Epoch 16/100, Loss: 0.119491
Epoch 17/100, Loss: 0.118103
Epoch 18/100, Loss: 0.117098
Epoch 19/100, Loss: 0.116395
Epoch 20/100, Loss: 0.115883
Epoch 21/100, Loss: 0.115470
Epoch 22/100, Loss: 0.115111
Epoch 23/100, Loss: 0.114786
Epoch 24/100, Loss: 0.114441
Epoch 25/100, Loss: 0.114065
Epoch 26/100, Loss: 0.113623
Epoch 27/100, Loss: 0.113113
Epoch 28/100, Loss: 0.112552
Epoch 29/100, Loss: 0.111951
Epoch 30/100, Loss: 0.111305
Epoch 31/100, Loss: 0.110584
Epoch 32/100, Loss: 0.109725
Epoch 33/100, Loss: 0.108721
Epoch 34/100, Loss: 0.10

### 加载训练好的 GATv2 模型

In [4]:
import torch
import pandas as pd
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
import torch.nn as nn
import os
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATv2Conv
from torch_geometric.utils import dense_to_sparse
from sklearn.model_selection import ParameterGrid



# GATv2 模型定义
class GATv2Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super(GATv2Net, self).__init__()
        self.conv1 = GATv2Conv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATv2Conv(hidden_channels * heads, out_channels, heads=1, concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x
# 1. 加载新数据（ceshi.csv）
file_path = "/home/develop/GATv2/data/final.csv"
data_new = pd.read_csv(file_path)

# 2. 转换数据为 PyTorch 张量，并转置（确保维度为 (特征数, 样本数)）
features_new = torch.tensor(data_new.values, dtype=torch.float32).T  # 转置，维度 (42, N)
num_features_new = features_new.size(0)  # 42

# 3. 创建边索引（完全图假设，每个特征和其他特征都有边）
adj_matrix_new = torch.ones((num_features_new, num_features_new)) - torch.eye(num_features_new)  # 完全图
edge_index_new = dense_to_sparse(adj_matrix_new)[0]

# 4. 创建图数据对象
device = torch.device('cpu')  # 使用 CPU
graph_data_new = Data(x=features_new, edge_index=edge_index_new)

# 5. 加载训练好的 GATv2 模型
model = torch.load("/home/develop/GATv2/Model/GATv2_trained.pth")
model.eval()  # 设置为评估模式

# 6. 确保数据移到正确的设备
graph_data_new = graph_data_new.to(device)

# 7. 推理
with torch.no_grad():  # 禁用梯度计算
    output_new = model(graph_data_new.x, graph_data_new.edge_index)  # 获得模型输出

# 8. 打印输出
print("GATv2 模型的输出：")
print(output_new)  # 打印推理结果

# 9. 输出数据的维度，确认是否匹配预期
print(f"模型输出的维度: {output_new.shape}")


GATv2 模型的输出：
tensor([[ 0.4915,  0.5633,  0.4966,  ...,  0.5573,  0.6134,  0.6735],
        [ 0.5077,  0.5791,  0.5039,  ...,  0.5742,  0.6353,  0.6909],
        [ 0.3111,  0.3866,  0.4035,  ...,  0.3606,  0.3791,  0.4775],
        ...,
        [-0.0446,  0.0403,  0.1793,  ..., -0.0525, -0.0463,  0.0910],
        [ 0.1278,  0.2070,  0.3073,  ...,  0.1598,  0.1426,  0.2781],
        [-0.0446,  0.0403,  0.1794,  ..., -0.0524, -0.0462,  0.0911]])
模型输出的维度: torch.Size([42, 1186])


  model = torch.load("/home/develop/GATv2/Model/GATv2_trained.pth")


### GATv2-VAE联合训练代码

In [6]:
import torch
import pandas as pd
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
import torch.nn as nn
import os
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATv2Conv
from torch_geometric.utils import dense_to_sparse
from sklearn.model_selection import ParameterGrid



# GATv2 模型定义
class GATv2Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super(GATv2Net, self).__init__()
        self.conv1 = GATv2Conv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATv2Conv(hidden_channels * heads, out_channels, heads=1, concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

file_path = "/home/develop/GATv2/data/final.csv"
data_new = pd.read_csv(file_path)

features_new = torch.tensor(data_new.values, dtype=torch.float32).T  # 转置，维度 (42, N)
num_features_new = features_new.size(0)  # 42


adj_matrix_new = torch.ones((num_features_new, num_features_new)) - torch.eye(num_features_new)  # 完全图
edge_index_new = dense_to_sparse(adj_matrix_new)[0]

# 4. 创建图数据对象
device = torch.device('cpu')  # 使用 CPU
graph_data_new = Data(x=features_new, edge_index=edge_index_new)

# 5. 加载训练好的 GATv2 模型
model = torch.load("/home/develop/GATv2/Model/GATv2_trained.pth")
model.eval()  # 设置为评估模式

# 6. 确保数据移到正确的设备
graph_data_new = graph_data_new.to(device)

# 7. 推理
with torch.no_grad():  # 禁用梯度计算
    output_new = model(graph_data_new.x, graph_data_new.edge_index)  # 获得模型输出

# 假设 GATv2 的输出是 output_new，形状为 [42, 1186]
output_new_transposed = output_new.T  # 转置后变为 [1186, 42]
output_new_transposed_np = output_new_transposed.detach().numpy()  # 转换为 NumPy 数组


  model = torch.load("/home/develop/GATv2/Model/GATv2_trained.pth")


In [9]:
# datax = pd.read_csv('/home/develop/VAE/data/csv/final.csv')
# print(datax.shape)
# print(type(datax))

print(output_new_transposed.shape)
print(type(output_new_transposed))
output_new_transposed_df = pd.DataFrame(output_new_transposed.numpy())

print(output_new_transposed_df.shape)
print(type(output_new_transposed_df))
print(output_new_transposed_df)


torch.Size([1186, 42])
<class 'torch.Tensor'>
(1186, 42)
<class 'pandas.core.frame.DataFrame'>
            0         1         2         3         4         5         6   \
0     0.491543  0.507723  0.311147  0.148319  0.308923  0.269823  0.285101   
1     0.563308  0.579097  0.386625  0.227106  0.384451  0.346154  0.361114   
2     0.496563  0.503938  0.403460  0.316362  0.402245  0.381320  0.389020   
3     0.588065  0.599112  0.449486  0.320111  0.447691  0.416622  0.428198   
4     0.635775  0.649510  0.469936  0.316828  0.467824  0.431070  0.445005   
...        ...       ...       ...       ...       ...       ...       ...   
1181  0.623153  0.638852  0.445490  0.284472  0.443287  0.404638  0.419667   
1182  0.720025  0.738142  0.501650  0.300035  0.498872  0.450466  0.468798   
1183  0.557254  0.574246  0.360610  0.181200  0.358141  0.315071  0.331635   
1184  0.613396  0.635251  0.379110  0.170461  0.376282  0.326213  0.346216   
1185  0.673518  0.690930  0.477546  0.300305  0

In [None]:
import os
import numpy as np
import pandas as pd
import torch.nn as nn
import pandas as pd
import torch
import pandas as pd
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, Model
from datetime import datetime
from torch_geometric.data import Data
from torch_geometric.utils import dense_to_sparse
from torch_geometric.data import Data
from torch_geometric.nn import GATv2Conv
from torch_geometric.utils import dense_to_sparse
from sklearn.model_selection import ParameterGrid


# GATv2 模型定义
class GATv2Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super(GATv2Net, self).__init__()
        self.conv1 = GATv2Conv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATv2Conv(hidden_channels * heads, out_channels, heads=1, concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

file_path = "/home/develop/GATv2/data/final.csv"
data_new = pd.read_csv(file_path)

features_new = torch.tensor(data_new.values, dtype=torch.float32).T  # 转置，维度 (42, N)
num_features_new = features_new.size(0)  # 42


adj_matrix_new = torch.ones((num_features_new, num_features_new)) - torch.eye(num_features_new)  # 完全图
edge_index_new = dense_to_sparse(adj_matrix_new)[0]

# 4. 创建图数据对象
device = torch.device('cpu')  # 使用 CPU
graph_data_new = Data(x=features_new, edge_index=edge_index_new)

# 5. 加载训练好的 GATv2 模型
model = torch.load("/home/develop/GATv2/Model/GATv2_trained.pth")
model.eval()  # 设置为评估模式

# 6. 确保数据移到正确的设备
graph_data_new = graph_data_new.to(device)

# 7. 推理
with torch.no_grad():  # 禁用梯度计算
    output_new = model(graph_data_new.x, graph_data_new.edge_index)  # 获得模型输出

output_new_transposed = output_new.T  # 转置后变为 [1186, 42]
output_new_transposed_df = pd.DataFrame(output_new_transposed.numpy())


# 1. 数据预处理
X = output_new_transposed_df.values  # 转为numpy数组

# 划分训练集和验证集，按8:2划分
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)

# 定义采样层
def sampling(args):
    z_mean, z_log_var = args
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.random.normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# 定义编码器部分为独立的模型
class Encoder(layers.Layer):
    def __init__(self, latent_dim, **kwargs):
        super(Encoder, self).__init__(**kwargs)
        self.dense_1 = layers.Dense(64, activation="relu")
        self.dense_2 = layers.Dense(32, activation="relu")
        self.dense_mean = layers.Dense(latent_dim)
        self.dense_log_var = layers.Dense(latent_dim)
        self.sampling_layer = layers.Lambda(sampling)

    def call(self, inputs):
        x = self.dense_1(inputs)
        x = self.dense_2(x)
        z_mean = self.dense_mean(x)
        z_log_var = self.dense_log_var(x)
        z = self.sampling_layer([z_mean, z_log_var])
        return z_mean, z_log_var, z

# 定义解码器部分为独立的模型
class Decoder(layers.Layer):
    def __init__(self, original_dim, **kwargs):
        super(Decoder, self).__init__(**kwargs)
        self.dense_1 = layers.Dense(32, activation="relu")
        self.dense_2 = layers.Dense(64, activation="relu")
        self.dense_output = layers.Dense(original_dim, activation="sigmoid")

    def call(self, inputs):
        x = self.dense_1(inputs)
        x = self.dense_2(x)
        return self.dense_output(x)

# 定义VAE模型
class VAE(Model):
    def __init__(self, original_dim, latent_dim, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.original_dim = original_dim
        self.encoder = Encoder(latent_dim=latent_dim)
        self.decoder = Decoder(original_dim=original_dim)
        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss_fn = tf.keras.losses.MeanSquaredError()
            reconstruction_loss = tf.reduce_mean(reconstruction_loss_fn(data, reconstruction))
            kl_loss = -0.5 * tf.reduce_mean(
                1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1
            )
            total_loss = reconstruction_loss + 0.1 * kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def test_step(self, data):
        # 注意：这里data直接是输入特征，而不是元组 (input, target)
        z_mean, z_log_var, z = self.encoder(data)
        reconstruction = self.decoder(z)
        reconstruction_loss_fn = tf.keras.losses.MeanSquaredError()
        reconstruction_loss = tf.reduce_mean(reconstruction_loss_fn(data, reconstruction))
        kl_loss = -0.5 * tf.reduce_mean(
            1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=-1
        )
        total_loss = reconstruction_loss + 0.1 * kl_loss
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

    def call(self, inputs):
        _, _, z = self.encoder(inputs)
        return self.decoder(z)

# 网格搜索训练过程
with open(log_file, "w") as log:
    log.write("Training Log\n")
    log.write("Parameters: latent_dim, batch_size, epochs\n")
    log.write("Results: reconstruction_error_threshold, anomalies_detected, training_time\n")
    log.write("-" * 80 + "\n")

    for latent_dim in latent_dims:
        for batch_size in batch_sizes:
            for epochs in epochs_list:
                input_dim = X_train.shape[1]

                # 构建VAE模型
                vae = VAE(original_dim=input_dim, latent_dim=latent_dim)
                vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005))

                # 训练模型
                start_time = datetime.now()
                history = vae.fit(
                    X_train, 
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(X_test,),  # 只传递验证数据，不包括目标
                    verbose=0
                )
                training_time = datetime.now() - start_time

                # 绘制训练损失
                plt.figure(figsize=(10, 6))
                
                # 检查是否存在验证损失键
                if 'val_loss' in history.history:
                    plt.plot(history.history["loss"], label="Train Loss")
                    plt.plot(history.history["val_loss"], label="Validation Loss")
                else:
                    plt.plot(history.history["loss"], label="Loss")
                
                plt.xlabel("Epoch")
                plt.ylabel("Loss")
                plt.legend()
                plt.title(f"Loss (latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs})")
                plt.savefig(f"{save_dir}loss_latent{latent_dim}_batch{batch_size}_epochs{epochs}.png")
                plt.close()

                # 异常检测
                X_pred = vae.predict(X_test, verbose=0)
                reconstruction_error = np.mean(np.square(X_test - X_pred), axis=1)
                threshold = np.percentile(reconstruction_error, 95)
                anomalies = reconstruction_error > threshold

                # 绘制重构误差分布
                plt.figure()
                plt.hist(reconstruction_error, bins=50)
                plt.xlabel("Reconstruction Error")
                plt.ylabel("Number of Samples")
                plt.title(f"Error Dist. (latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs})")
                plt.savefig(f"{save_dir}error_dist_latent{latent_dim}_batch{batch_size}_epochs{epochs}.png")
                plt.close()

                # 记录日志
                log.write(f"latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs}\n")
                log.write(f"reconstruction_error_threshold={threshold:.4f}, anomalies_detected={np.sum(anomalies)}, training_time={training_time}\n")
                log.write("-" * 80 + "\n")
                print(f"Params: latent_dim={latent_dim}, batch_size={batch_size}, epochs={epochs}")
                print(f"Reconstruction Error Threshold: {threshold:.4f}")
                print(f"Anomalies detected: {np.sum(anomalies)}")
                print(f"Training Time: {training_time}\n")

Params: latent_dim=5, batch_size=16, epochs=20
Reconstruction Error Threshold: 0.0242
Anomalies detected: 12
Training Time: 0:00:02.773572

Params: latent_dim=5, batch_size=16, epochs=30
Reconstruction Error Threshold: 0.0244
Anomalies detected: 12
Training Time: 0:00:03.634187

Params: latent_dim=5, batch_size=16, epochs=50
Reconstruction Error Threshold: 0.0258
Anomalies detected: 12
Training Time: 0:00:05.193417

Params: latent_dim=5, batch_size=32, epochs=20
Reconstruction Error Threshold: 0.0247
Anomalies detected: 12
Training Time: 0:00:02.186362

Params: latent_dim=5, batch_size=32, epochs=30
Reconstruction Error Threshold: 0.0252
Anomalies detected: 12
Training Time: 0:00:03.229337

Params: latent_dim=5, batch_size=32, epochs=50
Reconstruction Error Threshold: 0.0250
Anomalies detected: 12
Training Time: 0:00:03.813287

Params: latent_dim=5, batch_size=64, epochs=20
Reconstruction Error Threshold: 0.0275
Anomalies detected: 12
Training Time: 0:00:01.952383

Params: latent_dim=5

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATv2Conv
from torch_geometric.utils import dense_to_sparse
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model

# GATv2 模型定义
class GATv2Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super(GATv2Net, self).__init__()
        self.conv1 = GATv2Conv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATv2Conv(hidden_channels * heads, out_channels, heads=1, concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

def prepare_data(file_path):
    data = pd.read_csv(file_path)
    
    # 移除 origin_index 列，如果存在的话
    if 'origin_index' in data.columns:
        features = data.drop(columns=['origin_index']).values
    else:
        features = data.values
    
    features = torch.tensor(features, dtype=torch.float32).T 
    num_features = features.size(0)
    
    adj_matrix = torch.ones((num_features, num_features)) - torch.eye(num_features)  # 完全图
    edge_index = dense_to_sparse(adj_matrix)[0]
    
    return Data(x=features, edge_index=edge_index)

def inference(model, file_path):
    model.eval()  # 设置为评估模式
    graph_data_new = prepare_data(file_path).to(device)

    with torch.no_grad():
        output_new = model(graph_data_new.x, graph_data_new.edge_index)  # 获得模型输出

    # 打印输出
    print("GATv2 模型的输出：")
    print(output_new)  # 打印推理结果
    print(f"模型输出的维度: {output_new.shape}")
    return output_new.T.numpy()  # 返回转置后的numpy数组

# VAE相关部分
def build_vae(input_dim, latent_dim):
    class Encoder(layers.Layer):
        def __init__(self, latent_dim, **kwargs):
            super(Encoder, self).__init__(**kwargs)
            self.dense_1 = layers.Dense(64, activation="relu")
            self.dense_2 = layers.Dense(32, activation="relu")
            self.dense_mean = layers.Dense(latent_dim)
            self.dense_log_var = layers.Dense(latent_dim)

        def call(self, inputs):
            x = self.dense_1(inputs)
            x = self.dense_2(x)
            z_mean = self.dense_mean(x)
            z_log_var = self.dense_log_var(x)
            return z_mean, z_log_var

    class Decoder(layers.Layer):
        def __init__(self, original_dim, **kwargs):
            super(Decoder, self).__init__(**kwargs)
            self.dense_1 = layers.Dense(32, activation="relu")
            self.dense_2 = layers.Dense(64, activation="relu")
            self.dense_output = layers.Dense(original_dim, activation="sigmoid")

        def call(self, inputs):
            x = self.dense_1(inputs)
            x = self.dense_2(x)
            return self.dense_output(x)

    class VAE(Model):
        def __init__(self, original_dim, latent_dim, **kwargs):
            super(VAE, self).__init__(**kwargs)
            self.original_dim = original_dim
            self.encoder = Encoder(latent_dim=latent_dim)
            self.decoder = Decoder(original_dim=original_dim)

        def call(self, inputs):
            z_mean, z_log_var = self.encoder(inputs)
            epsilon = tf.random.normal(shape=(tf.shape(z_mean)[0], latent_dim))
            z = z_mean + tf.exp(0.5 * z_log_var) * epsilon
            return self.decoder(z)

    vae = VAE(original_dim=input_dim, latent_dim=latent_dim)
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005))
    return vae

# 超参数
hidden_channels = 16  # 隐藏层维度
heads = 4  # 多头注意力

# 数据路径
data_file_path = "/home/develop/GATv2/data/test.csv"

# 设备配置
device = torch.device('cpu')  # 强制使用 CPU

# 初始化模型（这里假设模型的输入和输出维度与训练时相同）
graph_data = prepare_data(data_file_path)
in_channels = graph_data.x.size(1)
out_channels = graph_data.x.size(1)  # 输出维度等于输入维度（重构任务）

model = GATv2Net(
    in_channels=in_channels,
    hidden_channels=hidden_channels,
    out_channels=out_channels,  # 输出维度等于输入维度（重构任务）
    heads=heads
).to(device)

# 使用模型进行推理
output_new_transposed = inference(model, data_file_path)

# 将GATv2的输出转换为DataFrame
output_new_transposed_df = pd.DataFrame(output_new_transposed)

# 构建VAE模型
latent_dim = 20
input_dim = output_new_transposed_df.shape[1]

vae = build_vae(input_dim, latent_dim)

# 直接对整个数据集进行推理
X_pred = vae.predict(output_new_transposed_df.values, batch_size=16, verbose=0)
reconstruction_error = np.mean(np.square(output_new_transposed_df.values - X_pred), axis=1)

# 根据87百分位数检测异常
threshold = np.percentile(reconstruction_error, 87)
anomalies = reconstruction_error > threshold

# 输出异常数据的索引
anomalous_indices = np.where(anomalies)[0]
print("Anomalous indices:")
print(anomalous_indices)
# 数据路径
data_file_path = "/home/develop/GATv2/data/test.csv"

# 加载数据
data = pd.read_csv(data_file_path)

# 确认 'origin_index' 列存在
if 'origin_index' not in data.columns:
    print("No origin_index column found.")
else:
    # 获取指定索引对应的 'origin_index' 值
    anomalous_origin_indices = data.loc[anomalous_indices, 'origin_index'].values
    
    # 打印结果
    print("Origin indices for the specified rows:")
    print(anomalous_origin_indices)

# 如果需要进一步处理或保存结果，请在这里添加相应代码

GATv2 模型的输出：
tensor([[ 0.2154,  0.2661,  0.2049,  ..., -0.4076, -0.2532, -0.4238],
        [ 0.2154,  0.2661,  0.2049,  ..., -0.4076, -0.2532, -0.4238],
        [ 0.2154,  0.2661,  0.2049,  ..., -0.4075, -0.2532, -0.4238],
        ...,
        [ 0.2154,  0.2661,  0.2049,  ..., -0.4075, -0.2532, -0.4238],
        [ 0.2154,  0.2661,  0.2049,  ..., -0.4075, -0.2532, -0.4238],
        [ 0.2154,  0.2661,  0.2049,  ..., -0.4075, -0.2532, -0.4238]])
模型输出的维度: torch.Size([42, 115])
Anomalous indices:
[  3  13  14  45  52  54  58  59  64  74  88  89 104 106 110]
Origin indices for the specified rows:
[  0 895   0   0   0   0 608   0 115   0   0   0   0 932   0]


In [50]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GATv2Conv
from torch_geometric.utils import dense_to_sparse
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model

# GATv2 模型定义
class GATv2Net(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads):
        super(GATv2Net, self).__init__()
        self.conv1 = GATv2Conv(in_channels, hidden_channels, heads=heads, concat=True)
        self.conv2 = GATv2Conv(hidden_channels * heads, out_channels, heads=1, concat=False)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = self.conv2(x, edge_index)
        return x

def prepare_data(file_path):
    data = pd.read_csv(file_path)
    
    # 移除 origin_index 列，如果存在的话
    if 'origin_index' in data.columns:
        features = data.drop(columns=['origin_index']).values
    else:
        features = data.values
    
    features = torch.tensor(features, dtype=torch.float32).T 
    num_features = features.size(0)
    
    adj_matrix = torch.ones((num_features, num_features)) - torch.eye(num_features)  # 完全图
    edge_index = dense_to_sparse(adj_matrix)[0]
    
    return Data(x=features, edge_index=edge_index)

def inference(model, file_path):
    model.eval()  # 设置为评估模式
    graph_data_new = prepare_data(file_path).to(device)

    with torch.no_grad():
        output_new = model(graph_data_new.x, graph_data_new.edge_index)  # 获得模型输出

    # 打印输出
    print("GATv2 模型的输出：")
    print(output_new)  # 打印推理结果
    print(f"模型输出的维度: {output_new.shape}")
    return output_new.T.numpy()  # 返回转置后的numpy数组

# VAE相关部分
def build_vae(input_dim, latent_dim):
    class Encoder(layers.Layer):
        def __init__(self, latent_dim, **kwargs):
            super(Encoder, self).__init__(**kwargs)
            self.dense_1 = layers.Dense(64, activation="relu")
            self.dense_2 = layers.Dense(32, activation="relu")
            self.dense_mean = layers.Dense(latent_dim)
            self.dense_log_var = layers.Dense(latent_dim)

        def call(self, inputs):
            x = self.dense_1(inputs)
            x = self.dense_2(x)
            z_mean = self.dense_mean(x)
            z_log_var = self.dense_log_var(x)
            return z_mean, z_log_var

    class Decoder(layers.Layer):
        def __init__(self, original_dim, **kwargs):
            super(Decoder, self).__init__(**kwargs)
            self.dense_1 = layers.Dense(32, activation="relu")
            self.dense_2 = layers.Dense(64, activation="relu")
            self.dense_output = layers.Dense(original_dim, activation="sigmoid")

        def call(self, inputs):
            x = self.dense_1(inputs)
            x = self.dense_2(x)
            return self.dense_output(x)

    class VAE(Model):
        def __init__(self, original_dim, latent_dim, **kwargs):
            super(VAE, self).__init__(**kwargs)
            self.original_dim = original_dim
            self.encoder = Encoder(latent_dim=latent_dim)
            self.decoder = Decoder(original_dim=original_dim)

        def call(self, inputs):
            z_mean, z_log_var = self.encoder(inputs)
            epsilon = tf.random.normal(shape=(tf.shape(z_mean)[0], latent_dim))
            z = z_mean + tf.exp(0.5 * z_log_var) * epsilon
            return self.decoder(z)

    vae = VAE(original_dim=input_dim, latent_dim=latent_dim)
    vae.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005))
    return vae

# 超参数
hidden_channels = 16  # 隐藏层维度
heads = 4  # 多头注意力

# 数据路径
data_file_path = "/home/develop/GATv2/data/test.csv"

# 设备配置
device = torch.device('cpu')  # 强制使用 CPU

# 初始化模型（这里假设模型的输入和输出维度与训练时相同）
graph_data = prepare_data(data_file_path)
in_channels = graph_data.x.size(1)
out_channels = graph_data.x.size(1)  # 输出维度等于输入维度（重构任务）

model = GATv2Net(
    in_channels=in_channels,
    hidden_channels=hidden_channels,
    out_channels=out_channels,  # 输出维度等于输入维度（重构任务）
    heads=heads
).to(device)

# 使用模型进行推理
output_new_transposed = inference(model, data_file_path)

# 将GATv2的输出转换为DataFrame
output_new_transposed_df = pd.DataFrame(output_new_transposed)

# 构建VAE模型
latent_dim = 20
input_dim = output_new_transposed_df.shape[1]

vae = build_vae(input_dim, latent_dim)

# 直接对整个数据集进行推理
X_pred = vae.predict(output_new_transposed_df.values, batch_size=16, verbose=0)
reconstruction_error = np.mean(np.square(output_new_transposed_df.values - X_pred), axis=1)

# 根据87百分位数检测异常
threshold = np.percentile(reconstruction_error, 87)
anomalies = reconstruction_error > threshold

# 输出异常数据的索引
anomalous_indices = np.where(anomalies)[0]
print("Anomalous indices:")
print(anomalous_indices)
# 数据路径
data_file_path = "/home/develop/GATv2/data/test_temp.csv"

# 加载数据
data = pd.read_csv(data_file_path)

# 确认 'origin_index' 列存在
if 'origin_index' not in data.columns:
    print("No origin_index column found.")
else:
    # 获取指定索引对应的 'origin_index' 值
    anomalous_origin_indices = data.loc[anomalous_indices, 'origin_index'].values
    
    # 打印结果
    print("Origin indices for the specified rows:")
    print(anomalous_origin_indices)

# 如果需要进一步处理或保存结果，请在这里添加相应代码

GATv2 模型的输出：
tensor([[-0.0795,  0.0159,  0.2260,  ...,  0.0156,  0.1051, -0.1403],
        [-0.0795,  0.0159,  0.2260,  ...,  0.0156,  0.1051, -0.1403],
        [-0.0795,  0.0159,  0.2260,  ...,  0.0156,  0.1051, -0.1403],
        ...,
        [-0.0795,  0.0159,  0.2260,  ...,  0.0156,  0.1051, -0.1403],
        [-0.0795,  0.0159,  0.2260,  ...,  0.0156,  0.1051, -0.1403],
        [-0.0795,  0.0159,  0.2260,  ...,  0.0156,  0.1051, -0.1403]])
模型输出的维度: torch.Size([42, 115])
Anomalous indices:
[ 10  24  29  36  42  43  46  47  54  64  67  74  88  91 103]
Origin indices for the specified rows:
[  0   0 608   0   0   0 675   0   0 115   0   0 551   0   0]
