In [78]:
import torch
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GraphConv, TopKPooling, global_mean_pool
from torch.nn import Linear
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from torch.nn import Linear, Parameter
import math

In [58]:
node_csv_path = r"D:\plot\node_total_1.csv"
links_csv_path = r"D:\plot\link_total.csv"
labels_csv_path = r"D:\plot\label_total.csv"
edge_attr_csv_path = r"D:\plot\attr_total.csv"

In [59]:
node_df = pd.read_csv(node_csv_path)
links_df = pd.read_csv(links_csv_path)
edge_feature_df = pd.read_csv(edge_attr_csv_path)
labels_df = pd.read_csv(labels_csv_path)

In [60]:
graph_data_list = []

In [61]:
for session_id in tqdm(node_df['sessionId'].unique()):
    # 节点特征
    session_nodes = node_df[node_df['sessionId'] == session_id]
    node_features = session_nodes[['degree']].values
    
    # 对节点特征进行标准化
    scaler = StandardScaler()
    node_features_normalized = scaler.fit_transform(node_features)
    
    # 将标准化后的特征转换为 PyTorch 张量
    node_features_tensor = torch.tensor(node_features_normalized, dtype=torch.float)

    # 边索引
    session_edges = links_df[links_df['sessionId'] == session_id]
    edge_index = torch.tensor([session_edges['source'].values - 1, 
                               session_edges['target'].values - 1], 
                              dtype=torch.long)

    # 边特征
    edge_features = edge_feature_df[edge_feature_df['sessionId'] == session_id]['edge_feature'].values
    
    # 对边特征进行标准化
    edge_scaler = StandardScaler()
    edge_features_normalized = edge_scaler.fit_transform(edge_features.reshape(-1, 1))
    
    # 将标准化后的边特征转换为 PyTorch 张量
    edge_attr_tensor = torch.tensor(edge_features_normalized, dtype=torch.float)

    # 标签
    label = labels_df[labels_df['sessionId'] == session_id]['label'].iloc[0]
    label_tensor = torch.tensor([label], dtype=torch.float)

    # 创建图数据对象
    data = Data(x=node_features_tensor, edge_index=edge_index, y=label_tensor, edge_attr=edge_attr_tensor)
    graph_data_list.append(data)

100%|██████████| 530/530 [00:01<00:00, 517.50it/s]


In [62]:
for i in range(530):
    print(graph_data_list[i])

Data(x=[24, 1], edge_index=[2, 25], edge_attr=[25, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 48], edge_attr=[48, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 25], edge_attr=[25, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 44], edge_attr=[44, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 20], edge_attr=[20, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 41], edge_attr=[41, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 60], edge_attr=[60, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 20], edge_attr=[20, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 63], edge_attr=[63, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 40], edge_attr=[40, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 51], edge_attr=[51, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 58], edge_attr=[58, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 52], edge_attr=[52, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 32], edge_attr=[32, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 12], edge_attr=[12, 1], y=[1])
Data(x=[24, 1], edge_index=[2, 28], edge_attr=[28, 1], y=[1])
Data(x=[

In [83]:
class GraphBinaryClassificationNetWithEdgeFeatures(torch.nn.Module):
    def __init__(self, node_feature_dim=1, edge_feature_dim=1):
        super(GraphBinaryClassificationNetWithEdgeFeatures, self).__init__()
        self.conv1 = GraphConv(node_feature_dim, 128, edge_dim=edge_feature_dim)
        self.conv2 = GraphConv(128, 128, edge_dim=edge_feature_dim)
        self.conv3 = GraphConv(128, 256, edge_dim=edge_feature_dim)
        self.lin1 = Linear(256, 128)
        self.lin2 = Linear(128, 64)
        self.lin3 = Linear(64, 1)
        
        # 为边的重要性评估添加可学习的权重
        self.edge_importance_weights = Parameter(torch.Tensor(edge_feature_dim))
        self.reset_parameters()
        
    def reset_parameters(self):
        torch.nn.init.uniform_(self.edge_importance_weights, -0.1, 0.1)

    def forward(self, data):
        x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch
        
        edge_index, edge_attr = self.learn_and_prune_edges(edge_index, edge_attr)

        x = F.relu(self.conv1(x, edge_index, edge_attr)) 
        x = F.relu(self.conv2(x, edge_index, edge_attr))
        x = F.relu(self.conv3(x, edge_index, edge_attr))
        
        x = global_mean_pool(x, batch)

        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        x = torch.sigmoid(self.lin3(x)).squeeze(1)
        
        return x

    def learn_and_prune_edges(self, edge_index, edge_attr, ratio=0.5):
        # 使用可学习的权重评估边的重要性
        edge_scores = (edge_attr * self.edge_importance_weights).sum(dim=-1)
        
        # 选择要保留的边
        _, indices = torch.sort(edge_scores, descending=True)
        num_edges_to_keep = int(indices.size(0) * ratio)
        top_indices = indices[:num_edges_to_keep]
        
        edge_index = edge_index[:, top_indices]
        edge_attr = edge_attr[top_indices]
        
        return edge_index, edge_attr

In [74]:
def train(train_loader, model, optimizer, crit):
    model.train()
    loss_all = 0
    for data in train_loader:
        optimizer.zero_grad()
        output = model(data)
        label = data.y
        loss = crit(output, label)
        loss.backward()
        loss_all += data.num_graphs * loss.item()
        optimizer.step()
    return loss_all / len(train_loader)

In [75]:
def evaluate(loader, model):
    model.eval()
    predictions, labels = [], []

    with torch.no_grad():
        for data in loader:
            pred = model(data)
            label = data.y
            predictions.extend(pred.numpy())
            labels.extend(label.numpy())

    predictions = np.array(predictions)
    labels = np.array(labels)
    return roc_auc_score(labels, predictions)

In [76]:
# 交叉验证
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

In [84]:
auc_scores = []
for train_index, test_index in kf.split(graph_data_list):
    train_data = [graph_data_list[i] for i in train_index]
    test_data = [graph_data_list[i] for i in test_index]

    train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

    # 初始化模型、优化器和损失函数
    model = GraphBinaryClassificationNetWithEdgeFeatures()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    crit = torch.nn.BCEWithLogitsLoss()  # 使用 BCEWithLogitsLoss

    # 训练模型
    for epoch in range(10):
        train_loss = train(train_loader, model, optimizer, crit)

    # 评估模型
    auc_score = evaluate(test_loader, model)
    auc_scores.append(auc_score)
    print(f"Fold AUC Score: {auc_score}")



Fold AUC Score: 0.6934727731698521




Fold AUC Score: 0.7176724137931034




Fold AUC Score: 0.8711063372717508




Fold AUC Score: 0.7024066091954022




Fold AUC Score: 0.7654784240150094


In [85]:
# 计算平均 AUC 分数
avg_auc_score = np.mean(auc_scores)
print("Average AUC Score:", avg_auc_score)

Average AUC Score: 0.7500273114890236


In [None]:
for session_id in tqdm(node_df['sessionId'].unique()):
    # 节点特征
    session_nodes = node_df[node_df['sessionId'] == session_id]
    node_features = session_nodes[['degree','cluster', 'efficiency']].values
    
    # 对节点特征进行标准化
    scaler = StandardScaler()
    node_features_normalized = scaler.fit_transform(node_features)
    
    # 将标准化后的特征转换为 PyTorch 张量
    node_features_tensor = torch.tensor(node_features_normalized, dtype=torch.float)

    # 边索引
    session_edges = links_df[links_df['sessionId'] == session_id]
    edge_index = torch.tensor([session_edges['source'].values - 1, 
                               session_edges['target'].values - 1], 
                              dtype=torch.long)

    # 边特征
    edge_features = edge_feature_df[edge_feature_df['sessionId'] == session_id]['edge_feature'].values
    
    # 对边特征进行标准化
    edge_scaler = StandardScaler()
    edge_features_normalized = edge_scaler.fit_transform(edge_features.reshape(-1, 1))
    
    # 将标准化后的边特征转换为 PyTorch 张量
    edge_attr_tensor = torch.tensor(edge_features_normalized, dtype=torch.float)

    # 标签
    label = labels_df[labels_df['sessionId'] == session_id]['label'].iloc[0]
    label_tensor = torch.tensor([label], dtype=torch.float)

    # 创建图数据对象
    data = Data(x=node_features_tensor, edge_index=edge_index, y=label_tensor, edge_attr=edge_attr_tensor)
    graph_data_list.append(data)