In [21]:
import numpy as np
import pandas as pd
from torch_geometric.nn import GCNConv, GATConv, MessagePassing,SAGEConv,GCN2Conv,GATv2Conv,GraphConv,TransformerConv,GINConv,RGATConv,RGCNConv
from torch import scatter_add
from torch.nn import Linear
import torch.nn.functional as F
from torch.nn import Linear, BatchNorm1d, Dropout
from torch_geometric.nn import global_mean_pool
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from sklearn.metrics import auc,roc_auc_score,roc_curve
import torch
from torch_geometric.data import Data
import networkx as nx
import community as community_louvain
from sklearn.preprocessing import StandardScaler

In [2]:
torch.__version__

'2.1.1+cu118'

In [3]:

from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch_geometric.loader import DataLoader
import torch.optim as optim

# 加载数据
data = np.load('data.npz')
x = data['x']
y = data['y']
edge_index = data['edge_index']
train_mask_t= data['train_mask']
test_mask = data['test_mask']
edge_type = data['edge_type']
np.random.seed(42)
np.random.shuffle(train_mask_t)
train_mask = train_mask_t[:int(len(train_mask_t)/10*8)]
valid_mask = train_mask_t[int(len(train_mask_t)/10*8):]


### 开始特征工程
#### 1.节点重要度
1.1度中心性
$c_D (v)=\frac{deg(v)}{n-1}$
有向图要计算出度中心性和入度中心性

In [4]:
#构造每个节点的入度和出度
num_nodes = x.shape[0]
in_degree = np.zeros(x.shape[0], dtype=int)
out_degree = np.zeros(x.shape[0], dtype=int)
for src,dst in edge_index:
    in_degree[dst]+=1
    out_degree[src]+=1
indeg_centrality=in_degree/(num_nodes-1)
outdeg_centrality=out_degree/(num_nodes-1)

1.2 接近中心性
衡量节点影响范围和在信息传播中的作用，是节点到其他节点平均距离的倒数
$c_c(v)=\frac{1}{\sum_{u\neq v }d(u,v)}$

In [5]:

G = nx.DiGraph()
G.add_edges_from(edge_index)
# 计算所有节点对的最短路径长度
path_lengths = dict(nx.all_pairs_shortest_path_length(G))
closeness_centrality = np.zeros(num_nodes, dtype=float)
for node in path_lengths:
    total_distance = sum(path_lengths[node].values())
    if total_distance > 0:  # 避免除以零
        closeness_centrality[node] =1/ total_distance
print(closeness_centrality)

[0. 0. 0. ... 1. 0. 0.]


1.3 介数中心性
指节点在所有最短路径中出现的次数。该指标可以衡量节点在信息传播和资源流动中的作用。
$C_B(v)=\sum_{s\neq v\neq t}\frac{\sigma(s,t|v)}{\sigma(s,t)}$
其中$\sigma(s,t)$表示s与t之间的最短路径数，$\sigma(s,t|v)$表示v在最短路径中出现的次数

In [6]:
# betweenness_centrality = nx.betweenness_centrality(G)

1.4 特征向量重要度
指节点的重要性与其相邻节点的重要性有关。如果一个节点与其他重要节点相连，那么它的重要性也会提高。
$C_E(v)=\frac{1}{\lambda}\sum_{u\in N(v)}C_E(u)$
其中N(v)表示节点u的邻居节点集合，$\lambda$是常数，满足$Av=\lambda v$A是邻接矩阵，v是特征向量

In [7]:
eigenvector_centrality = nx.eigenvector_centrality_numpy(G)
eigenvector_centrality_array = np.array([eigenvector_centrality.get(node, 0) for node in range(num_nodes)])

1.5 集群系数
计算方法为：该节点的周围节点之间相连数 / 该节点与周围节点相连数，值域为[0,1]


In [8]:
#转为无向图计算
G_undirected = G.to_undirected()
# 计算集群系数
clustering_coefficient = nx.clustering(G_undirected)
# 将集群系数结果转换为数组形式
clustering_coefficient_array = np.array([clustering_coefficient.get(node, 0) for node in range(num_nodes)])

1.6 pagerank节点相对重要性
一个节点向它指向的每个节点“投票”，每个投票的权重是投票者的重要性除以它的出度（即它指向的节点数）

In [9]:
page_rank=nx.pagerank(G, alpha=0.85)
pagerank = np.array([page_rank.get(node, 0) for node in range(num_nodes)])

# 输出PageRank值
print(pagerank)

[0.00000000e+00 0.00000000e+00 0.00000000e+00 ... 2.23319579e-06
 4.38268035e-06 0.00000000e+00]


1.7 katz centrality 
Katz中心性的定义基于以下思想：节点的重要性是其所有邻居的重要性的总和，但是每条路径的贡献通过一个衰减因子α逐步减少。这个衰减因子α是一个小于网络所有权重的特征值的正数，用来控制路径长度对中心性计算的影响。
$C_{\text{Katz}}(i) = \sum_{k=1}^{\infty} \sum_{j=1}^{n} \alpha^k \cdot (A^k)_{ji}$

In [10]:
katz_centrality = nx.katz_centrality(G, alpha=0.005)

# 将Katz中心性结果转换为数组形式
katz_centrality_array = np.array([katz_centrality.get(node, 0) for node in range(num_nodes)])

1.8 HITS
算法是一种用于网络分析的算法，主要用于识别网络中的“权威”（Authorities）和“枢纽”（Hubs）
权威（Authorities）：这些是被许多枢纽指向的节点。在互联网环境中，一个权威页面是一个提供特定主题高质量信息的页面，被许多其他页面通过链接引用。
枢纽（Hubs）：枢纽是指向多个权威的节点。一个好的枢纽是指一个包含许多权威链接的页面。

In [11]:
# # 计算HITS的权威和枢纽分数
# hubs, authorities = nx.hits(G)
# 
# # 将结果转换为数组形式
# hubs_array = np.array([hubs.get(node, 0) for node in range(max(edge_index.flatten()) + 1)])
# authorities_array = np.array([authorities.get(node, 0) for node in range(max(edge_index.flatten()) + 1)])

In [12]:
# 计算所有入边的权重和
# 初始化节点特征为0（这里假设节点特征是单一值，如果有多维特征，需要调整这里）
in_weight=np.zeros(num_nodes,dtype=int)
out_weight=np.zeros(num_nodes,dtype=int)
# 累加每个节点的所有入边的边类型
for i, edge in enumerate(edge_index):
    src, dst = edge
    in_weight[dst] += edge_type[i]
    out_weight[src]+=edge_type[i]

In [13]:
triangle_count = nx.triangles(G_undirected)
triangle_count=np.array([triangle_count.get(node, 0) for node in range(num_nodes)])

In [14]:
# # 计算所有可能节点对的杰卡德相似性
# jaccard_coefficients = list(nx.jaccard_coefficient(G_undirected))
# 
# # 初始化计数器
# count_above_threshold =np.zeros(num_nodes,dtype=int)
# 
# # 对于每个节点对的杰卡德相似性，如果大于阈值，则增加计数
# for u, v, coeff in jaccard_coefficients:
#     if coeff > 0.5:
#         count_above_threshold[u] += 1
#         count_above_threshold[v] += 1

### 2.社区属性

In [15]:


# 使用Louvain算法进行社区检测
partition = community_louvain.best_partition(G_undirected)
community_label=np.array([partition.get(node, 0) for node in range(num_nodes)])

In [16]:
constraints = nx.constraint(G)
constraint=np.array([constraints.get(node, 0) for node in range(num_nodes)])
constraint=np.nan_to_num(constraint,nan=0)

In [17]:
def to_column_vector(arr):
    return np.array(arr).reshape(-1, 1)

# 将所有特征转换为列向量
indeg_col = to_column_vector(indeg_centrality)
outdeg_col = to_column_vector(outdeg_centrality)
closeness_col = to_column_vector(closeness_centrality)
eigenvector_col = to_column_vector(eigenvector_centrality_array)
clustering_col = to_column_vector(clustering_coefficient_array)
pagerank_col = to_column_vector(pagerank)
katz_col = to_column_vector(katz_centrality_array)
community_col = to_column_vector(community_label)
triangle_col= to_column_vector(triangle_count)
in_weight_col=to_column_vector(in_weight)
out_weight_col=to_column_vector(out_weight)
constraint_col=to_column_vector(constraint)
x_train=np.hstack((x,indeg_col,outdeg_col,closeness_col,eigenvector_col,clustering_col,pagerank_col,katz_col,community_col,triangle_col,in_weight_col,out_weight_col,constraint_col))

In [18]:


scaler = StandardScaler()

# 对特征矩阵进行标准化
x_train = scaler.fit_transform(x_train)

In [19]:
# 将数据转换为PyTorch张量
x_tensor = torch.tensor(x_train, dtype=torch.float).cpu()
y_tensor = torch.tensor(y, dtype=torch.long).cpu()
edge_index_tensor = torch.tensor(edge_index.T, dtype=torch.long).cpu()
edge_type_tensor = torch.tensor(edge_type, dtype=torch.long).cpu()

# 创建掩码
num_nodes = x.shape[0]
train_mask_tensor = torch.zeros(num_nodes, dtype=torch.bool).cpu()
test_mask_tensor = torch.zeros(num_nodes, dtype=torch.bool).cpu()
val_mask_tensor=torch.zeros(num_nodes, dtype=torch.bool).cpu()
train_mask_tensor[train_mask] = True
test_mask_tensor[test_mask] = True
val_mask_tensor[valid_mask]=True
# 构造PyTorch Geometric的Data对象
data = Data(x=x_tensor, edge_index=edge_index_tensor, edge_type=edge_type_tensor,y=y_tensor).cpu()
data.train_mask = train_mask_tensor
data.test_mask = test_mask_tensor
data.val_mask=val_mask_tensor

### 经典GCN模型

In [22]:

torch.cuda.empty_cache()
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(data.num_node_features, 4*hidden_channels)
        self.bn1 = BatchNorm1d(4*hidden_channels)
        self.conv2 =  GCNConv(4*hidden_channels, 4*hidden_channels)
        self.bn2 = BatchNorm1d(4*hidden_channels)
        self.conv3 =  GCNConv(4*hidden_channels, 2*hidden_channels)
        self.bn3 = BatchNorm1d(2*hidden_channels)
        self.conv4 =  GCNConv(2*hidden_channels, hidden_channels)
        self.bn4 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, 2)
        self.dropout1 = Dropout(p=0.2)
        self.dropout2=Dropout(p=0.05)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)     
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)       
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)       
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv4(x, edge_index)
        x = self.bn4(x)     
        x = x.relu()
        x = self.dropout1(x)
        x = self.lin(x)

        return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data.to(device)
model = GCN(hidden_channels=64).to(device)
print(model)

GCN(
  (conv1): GCNConv(29, 256)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): GCNConv(256, 256)
  (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): GCNConv(256, 128)
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): GCNConv(128, 64)
  (bn4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin): Linear(in_features=64, out_features=2, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (dropout2): Dropout(p=0.05, inplace=False)
)


In [23]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# 训练函数
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index )
    loss = criterion(out[train_mask_t], data.y[train_mask_t])

    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(300):
    loss = train()
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)

OutOfMemoryError: CUDA out of memory. Tried to allocate 366.00 MiB. GPU 0 has a total capacty of 6.00 GiB of which 0 bytes is free. Of the allocated memory 4.35 GiB is allocated by PyTorch, and 276.42 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [62]:
#用valdata本地测一下分数
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])
auc_score

0.775443253065804

# 经典GAT模型

In [49]:

torch.cuda.empty_cache()
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GAT, self).__init__()
        torch.manual_seed(12345)
        self.conv1 =GATConv(data.num_node_features, 4*hidden_channels)
        self.bn1 = BatchNorm1d(4*hidden_channels)
        self.conv2 =  GATConv(4*hidden_channels, 4*hidden_channels)
        self.bn2 = BatchNorm1d(4*hidden_channels)
        self.conv3 =  GATConv(4*hidden_channels, 2*hidden_channels)
        self.bn3 = BatchNorm1d(2*hidden_channels)
        self.conv4 =  GATConv(2*hidden_channels, hidden_channels)
        self.bn4 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, 2)
        self.dropout1 = Dropout(p=0.15)
        self.dropout2=Dropout(p=0.07)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)     
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)       
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)       
        x = x.relu()
        # x=self.dropout2(x)
        # x = self.conv4(x, edge_index)
        # x = self.bn4(x)     
        # x = x.relu()
        x = self.dropout1(x)
        x = self.lin(x)

        return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data.to(device)
model = GAT(hidden_channels=64).to(device)
print(model)

GAT(
  (conv1): GATConv(29, 256, heads=1)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): GATConv(256, 128, heads=1)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): GATConv(128, 64, heads=1)
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): GATConv(64, 64, heads=1)
  (bn4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin): Linear(in_features=64, out_features=2, bias=True)
  (dropout1): Dropout(p=0.15, inplace=False)
  (dropout2): Dropout(p=0.07, inplace=False)
)


In [50]:

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# 训练函数
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index )
    loss = criterion(out[train_mask_t], data.y[train_mask_t])

    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(300):
    loss = train()
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)

Epoch 0: Loss 0.7623255252838135
Epoch 1: Loss 0.47997531294822693
Epoch 2: Loss 0.27791616320610046
Epoch 3: Loss 0.1685212254524231
Epoch 4: Loss 0.11690828204154968
Epoch 5: Loss 0.09052275121212006
Epoch 6: Loss 0.07640526443719864
Epoch 7: Loss 0.06915302574634552
Epoch 8: Loss 0.06532346457242966
Epoch 9: Loss 0.06326398998498917
Epoch 10: Loss 0.06246306747198105
Epoch 11: Loss 0.06226486340165138
Epoch 12: Loss 0.06235358491539955
Epoch 13: Loss 0.06273861229419708
Epoch 14: Loss 0.06313282251358032
Epoch 15: Loss 0.06336932629346848
Epoch 16: Loss 0.06363740563392639
Epoch 17: Loss 0.06392396241426468
Epoch 18: Loss 0.06390650570392609
Epoch 19: Loss 0.06444799154996872
Epoch 20: Loss 0.06420280784368515
Epoch 21: Loss 0.06405408680438995
Epoch 22: Loss 0.0637855976819992
Epoch 23: Loss 0.06382177025079727
Epoch 24: Loss 0.06359820812940598
Epoch 25: Loss 0.0629529133439064
Epoch 26: Loss 0.06269507110118866
Epoch 27: Loss 0.062000978738069534
Epoch 28: Loss 0.0615702681243419

In [51]:
#用valdata本地测一下分数
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])
auc_score

0.7699891676159011

### SAGE模型

In [46]:
torch.cuda.empty_cache()
class SAGE(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(SAGE, self).__init__()
        torch.manual_seed(12345)
        self.conv1 =SAGEConv(data.num_node_features, 4*hidden_channels)
        self.bn1 = BatchNorm1d(4*hidden_channels)
        self.conv2 = SAGEConv(4*hidden_channels, 2*hidden_channels)
        self.bn2 = BatchNorm1d(2*hidden_channels)
        self.conv3 = SAGEConv(2*hidden_channels, 2*hidden_channels)
        self.bn3 = BatchNorm1d(2*hidden_channels)
        self.conv4 = SAGEConv(2*hidden_channels, hidden_channels)
        self.bn4 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, 2)
        self.dropout1 = Dropout(p=0.15)
        self.dropout2=Dropout(p=0.07)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)     
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)       
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)       
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv4(x, edge_index)
        x = self.bn4(x)     
        x = x.relu()
        x = self.dropout1(x)
        x = self.lin(x)

        return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data.to(device)
model = SAGE(hidden_channels=64).to(device)
print(model)

SAGE(
  (conv1): SAGEConv(29, 256, aggr=mean)
  (bn1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): SAGEConv(256, 128, aggr=mean)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): SAGEConv(128, 128, aggr=mean)
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): SAGEConv(128, 64, aggr=mean)
  (bn4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin): Linear(in_features=64, out_features=2, bias=True)
  (dropout1): Dropout(p=0.15, inplace=False)
  (dropout2): Dropout(p=0.07, inplace=False)
)


In [47]:

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# 训练函数
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index )
    loss = criterion(out[train_mask_t], data.y[train_mask_t])

    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(300):
    loss = train()
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)

Epoch 0: Loss 0.763080894947052
Epoch 1: Loss 0.5215872526168823
Epoch 2: Loss 0.313848614692688
Epoch 3: Loss 0.21198579668998718
Epoch 4: Loss 0.14587584137916565
Epoch 5: Loss 0.1080206036567688
Epoch 6: Loss 0.08609303086996078
Epoch 7: Loss 0.07352536171674728
Epoch 8: Loss 0.06693994998931885
Epoch 9: Loss 0.06357361376285553
Epoch 10: Loss 0.062112025916576385
Epoch 11: Loss 0.061726804822683334
Epoch 12: Loss 0.06180557608604431
Epoch 13: Loss 0.062151405960321426
Epoch 14: Loss 0.0627300813794136
Epoch 15: Loss 0.06309740245342255
Epoch 16: Loss 0.06335452944040298
Epoch 17: Loss 0.06385815143585205
Epoch 18: Loss 0.06413566321134567
Epoch 19: Loss 0.0643608495593071
Epoch 20: Loss 0.06462779641151428
Epoch 21: Loss 0.06445500999689102
Epoch 22: Loss 0.06450741738080978
Epoch 23: Loss 0.06462801247835159
Epoch 24: Loss 0.06471232324838638
Epoch 25: Loss 0.06450845301151276
Epoch 26: Loss 0.06454425305128098
Epoch 27: Loss 0.06442494690418243
Epoch 28: Loss 0.06409173458814621


In [48]:
#用valdata本地测一下分数
from sklearn.metrics import auc,roc_auc_score,roc_curve
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])
auc_score

0.792518318467534

### RGCN模型

In [28]:
class RGAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,
                 num_relations):
        super().__init__()
        self.conv1 = RGCNConv(in_channels, hidden_channels, num_relations)
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 = RGCNConv(hidden_channels, hidden_channels, num_relations)
        self.bn2 = BatchNorm1d(hidden_channels)
        self.conv3 = RGCNConv(hidden_channels, hidden_channels, num_relations)
        self.bn3 = BatchNorm1d(hidden_channels)
        self.conv4 = RGCNConv(hidden_channels, hidden_channels, num_relations)
        self.bn4 = BatchNorm1d(hidden_channels)
        self.dropout1 = Dropout(p=0.08)
        self.dropout2 = Dropout(p=0.15)
        self.lin = torch.nn.Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_type):
        x = self.conv1(x, edge_index, edge_type)
        x= self.bn1(x) 
        x = x.relu()
        x=self.dropout1(x)
        x = self.conv2(x, edge_index, edge_type)
        x= self.bn2(x)                      
        x = x.relu()
        x=self.dropout1(x)
        x =self.conv3(x, edge_index, edge_type)
        x= self.bn3(x)                           
        x = x.relu()
        x=self.dropout1(x) 
        x =self.conv4(x, edge_index, edge_type)
        x= self.bn4(x)                           
        x = x.relu()
        x=self.dropout2(x)
        x = self.lin(x)
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)
model = RGAT(data.num_node_features, 64, 2, 12).to(device)

# 损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()  # 适用于多类分类任务
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

# 将数据移动到设备
# data = data.to(device)
print(model)

RGAT(
  (conv1): RGCNConv(29, 64, num_relations=12)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): RGCNConv(64, 64, num_relations=12)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): RGCNConv(64, 64, num_relations=12)
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): RGCNConv(64, 64, num_relations=12)
  (bn4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout(p=0.08, inplace=False)
  (dropout2): Dropout(p=0.15, inplace=False)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [29]:
def train():

    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index, data.edge_type)
    loss = criterion(out[train_mask_t], data.y[train_mask_t])
    loss.backward()
    optimizer.step()
    return loss.item()
torch.cuda.empty_cache()
# 训练模型
for epoch in range(300):  # 可根据需要调整epoch数量
    loss = train()
    print(f'Epoch {epoch+1}: Loss {loss:.5f}')

def predict(model, data):
    model.eval()
    with torch.no_grad():
        # 确保传递了edge_type参数
        out = model(data.x, data.edge_index, data.edge_type)
        predictions = torch.softmax(out, dim=1)
        return predictions


# 使用模型进行预测
predictions = predict(model, data)


Epoch 1: Loss 0.70713
Epoch 2: Loss 0.31003
Epoch 3: Loss 0.07549
Epoch 4: Loss 0.07503
Epoch 5: Loss 0.09425
Epoch 6: Loss 0.10966
Epoch 7: Loss 0.12125
Epoch 8: Loss 0.12696
Epoch 9: Loss 0.12757
Epoch 10: Loss 0.12197
Epoch 11: Loss 0.11262
Epoch 12: Loss 0.10645
Epoch 13: Loss 0.10177
Epoch 14: Loss 0.09915
Epoch 15: Loss 0.09785
Epoch 16: Loss 0.09285
Epoch 17: Loss 0.08647
Epoch 18: Loss 0.08239
Epoch 19: Loss 0.07736
Epoch 20: Loss 0.07385
Epoch 21: Loss 0.07172
Epoch 22: Loss 0.06906
Epoch 23: Loss 0.06611
Epoch 24: Loss 0.06393
Epoch 25: Loss 0.06236
Epoch 26: Loss 0.06187
Epoch 27: Loss 0.06200
Epoch 28: Loss 0.06170
Epoch 29: Loss 0.06154
Epoch 30: Loss 0.06110
Epoch 31: Loss 0.06107
Epoch 32: Loss 0.06103
Epoch 33: Loss 0.06122
Epoch 34: Loss 0.06136
Epoch 35: Loss 0.06157
Epoch 36: Loss 0.06149
Epoch 37: Loss 0.06157
Epoch 38: Loss 0.06168
Epoch 39: Loss 0.06163
Epoch 40: Loss 0.06122
Epoch 41: Loss 0.06134
Epoch 42: Loss 0.06109
Epoch 43: Loss 0.06073
Epoch 44: Loss 0.060

In [30]:
#用valdata本地测一下分数
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])
auc_score

0.7863871520288924

### RGAT模型

In [35]:
torch.cuda.empty_cache()
class RGAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels,
                 num_relations):
        super().__init__()
        self.conv1 = RGATConv(in_channels, hidden_channels, num_relations)
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 = RGATConv(hidden_channels, hidden_channels, num_relations)
        self.bn2 = BatchNorm1d(hidden_channels)
        self.conv3 = RGATConv(hidden_channels, int(hidden_channels/2), num_relations)
        self.bn3 = BatchNorm1d(int(hidden_channels/2))
        self.dropout = Dropout(p=0.1)
        self.lin = torch.nn.Linear(int(hidden_channels/2), out_channels)

    def forward(self, x, edge_index, edge_type):
        x = self.conv1(x, edge_index, edge_type)
        x= self.bn1(x)
        x = x.relu()
        x = self.conv2(x, edge_index, edge_type)
        x= self.bn2(x)
        x = x.relu()
        x =self.conv3(x, edge_index, edge_type)
        x= self.bn3(x)
        x = x.relu()
        x=self.dropout(x)
        x = x.relu()
        x = self.lin(x)
        return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)
model = RGAT(data.num_node_features, 64, 2, 12).to(device)

# 损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()  # 适用于多类分类任务
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

# 将数据移动到设备
# data = data.to(device)
print(model)

RGAT(
  (conv1): RGATConv(29, 64, heads=1)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): RGATConv(64, 64, heads=1)
  (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): RGATConv(64, 32, heads=1)
  (bn3): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (lin): Linear(in_features=32, out_features=2, bias=True)
)


In [36]:
def train():

    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index, data.edge_type)
    loss = criterion(out[train_mask_t], data.y[train_mask_t])
    loss.backward()
    optimizer.step()
    return loss.item()

# 训练模型
for epoch in range(1000):  # 可根据需要调整epoch数量
    loss = train()
    print(f'Epoch {epoch+1}: Loss {loss:.5f}')

def predict(model, data):
    model.eval()
    with torch.no_grad():
        # 确保传递了edge_type参数
        out = model(data.x, data.edge_index, data.edge_type)
        predictions = torch.softmax(out, dim=1)
        return predictions


# 使用模型进行预测
predictions = predict(model, data)


Epoch 1: Loss 0.51631
Epoch 2: Loss 0.11791
Epoch 3: Loss 0.06644
Epoch 4: Loss 0.08525
Epoch 5: Loss 0.10164
Epoch 6: Loss 0.11103
Epoch 7: Loss 0.11356
Epoch 8: Loss 0.11126
Epoch 9: Loss 0.10533
Epoch 10: Loss 0.09861
Epoch 11: Loss 0.09080
Epoch 12: Loss 0.08277
Epoch 13: Loss 0.07539
Epoch 14: Loss 0.06925
Epoch 15: Loss 0.06623
Epoch 16: Loss 0.06657
Epoch 17: Loss 0.06924
Epoch 18: Loss 0.07209
Epoch 19: Loss 0.07333
Epoch 20: Loss 0.07189
Epoch 21: Loss 0.06851
Epoch 22: Loss 0.06568
Epoch 23: Loss 0.06481
Epoch 24: Loss 0.06572
Epoch 25: Loss 0.06673
Epoch 26: Loss 0.06736
Epoch 27: Loss 0.06752
Epoch 28: Loss 0.06677
Epoch 29: Loss 0.06594
Epoch 30: Loss 0.06488
Epoch 31: Loss 0.06454
Epoch 32: Loss 0.06439
Epoch 33: Loss 0.06448
Epoch 34: Loss 0.06471
Epoch 35: Loss 0.06496
Epoch 36: Loss 0.06498
Epoch 37: Loss 0.06512
Epoch 38: Loss 0.06483
Epoch 39: Loss 0.06448
Epoch 40: Loss 0.06436
Epoch 41: Loss 0.06422
Epoch 42: Loss 0.06417
Epoch 43: Loss 0.06440
Epoch 44: Loss 0.064

KeyboardInterrupt: 

In [37]:
#用valdata本地测一下分数
from sklearn.metrics import auc,roc_auc_score,roc_curve
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])
auc_score

0.533965735248783

### TansformerConv

In [24]:
torch.cuda.empty_cache()
#SAGE是目前跑出来最好的，GAT其次，GCN最差
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = TransformerConv(data.num_node_features, 4*hidden_channels)
        self.bn1 = BatchNorm1d(4*hidden_channels)
        self.conv2 =  TransformerConv(4*hidden_channels, 4*hidden_channels)
        self.bn2 = BatchNorm1d(4*hidden_channels)
        self.conv3 =  TransformerConv(4*hidden_channels, 2*hidden_channels)
        self.bn3 = BatchNorm1d(2*hidden_channels)
        self.conv4 =  TransformerConv(2*hidden_channels, hidden_channels)
        self.bn4 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, 2)
        self.dropout1 = Dropout(p=0.2)
        self.dropout2=Dropout(p=0.05)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)     
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)       
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)       
        x = x.relu()
        x=self.dropout2(x)
        x = self.conv4(x, edge_index)
        x = self.bn4(x)     
        x = x.relu()
        x = self.dropout1(x)
        x = self.lin(x)

        return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data.to(device)
model = GCN(hidden_channels=64).to(device)
print(model)

GCN(
  (conv1): TransformerConv(29, 128, heads=1)
  (bn1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): TransformerConv(128, 128, heads=1)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): TransformerConv(128, 128, heads=1)
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4): TransformerConv(128, 64, heads=1)
  (bn4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin): Linear(in_features=64, out_features=2, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (dropout2): Dropout(p=0.05, inplace=False)
)


In [25]:

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# 训练函数
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index )
    loss = criterion(out[train_mask_t], data.y[train_mask_t])

    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(300):
    loss = train()
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)

OutOfMemoryError: CUDA out of memory. Tried to allocate 284.00 MiB. GPU 0 has a total capacty of 6.00 GiB of which 0 bytes is free. Of the allocated memory 4.35 GiB is allocated by PyTorch, and 275.72 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# 构造测试集

In [35]:
#用valdata本地测一下分数

correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])
auc_score

0.7920269507551044

In [None]:
class EdgeWeightedConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(EdgeWeightedConv, self).__init__(aggr='add')  # "add" aggregation.
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index, edge_weight):
        # x: [N, in_channels], edge_index: [2, E], edge_weight: [E]
        edge_weight = edge_weight.unsqueeze(-1)  # [E, 1]
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x, edge_weight=edge_weight)

    def message(self, x_j, edge_weight):
        return edge_weight * self.lin(x_j)

class Net(torch.nn.Module):
    def __init__(self, num_node_features, hidden_dim):
        super(Net, self).__init__()
        self.conv1 = EdgeWeightedConv(num_node_features, hidden_dim)
        self.conv2 = EdgeWeightedConv(hidden_dim, hidden_dim)
        self.fc = torch.nn.Linear(hidden_dim, 2)

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr

        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.relu(self.conv2(x, edge_index, edge_weight))
        x = F.dropout(x, training=self.training)
        x = self.fc(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 使用示例
model = Net(17, hidden_dim=64).to(device)
# 判断是否有GPU
data = data.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)

# 训练函数
def train(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(300):
    loss = train(model, data, optimizer, criterion)
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)



In [None]:
from sklearn.metrics import auc,roc_auc_score,roc_curve
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])

In [31]:
pred=predictions.cpu().numpy()[test_mask]
pred[:,1]=pred[:,1].astype(float)

In [32]:
result=pd.DataFrame({'index':test_mask,'predict':pred[:,1]})
result.to_csv('rgcn.csv',index=None)