In [1]:
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import Data


In [2]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch_geometric.loader import DataLoader
import torch.optim as optim

# 加载数据
data = np.load('data.npz')
x = data['x']
y = data['y']
edge_index = data['edge_index']
train_mask_t= data['train_mask']
test_mask = data['test_mask']
edge_type = data['edge_type']
np.random.seed(42)
np.random.shuffle(train_mask_t)
train_mask = train_mask_t[:int(len(train_mask_t)/10*8)]
valid_mask = train_mask_t[int(len(train_mask_t)/10*8):]
# 将数据转换为PyTorch张量
x_tensor = torch.tensor(x, dtype=torch.float)
y_tensor = torch.tensor(y, dtype=torch.long)
edge_index_tensor = torch.tensor(edge_index.T, dtype=torch.long)
edge_type_tensor = torch.tensor(edge_type, dtype=torch.float)

# 创建掩码
num_nodes = x.shape[0]
train_mask_tensor = torch.zeros(num_nodes, dtype=torch.bool)
test_mask_tensor = torch.zeros(num_nodes, dtype=torch.bool)
val_mask_tensor=torch.zeros(num_nodes, dtype=torch.bool)
train_mask_tensor[train_mask] = True
test_mask_tensor[test_mask] = True
val_mask_tensor[valid_mask]=True
# 构造PyTorch Geometric的Data对象
data = Data(x=x_tensor, edge_index=edge_index_tensor, edge_attr=edge_type_tensor,y=y_tensor)
data.train_mask = train_mask_tensor
data.test_mask = test_mask_tensor
data.val_mask=val_mask_tensor

In [3]:
#这个GATv2还有点问题
from torch import scatter_add
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATConv, MessagePassing,SAGEConv,GCN2Conv,GATv2Conv
from torch.nn import Linear, BatchNorm1d, Dropout
from torch_geometric.nn import global_mean_pool
from torch_geometric.nn import SAGEConv

import torch
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

class GATv2(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GATv2, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GATv2Conv(data.num_node_features, hidden_channels) 
        self.conv2 =GATv2Conv(hidden_channels, hidden_channels)  
        self.conv3 = GATv2Conv(hidden_channels, hidden_channels)  
        self.lin = Linear(hidden_channels, 2)
        self.dropout = Dropout(p=0.5) 

    def forward(self, x, edge_index, edge_weight):
        x = self.conv1(x, edge_index,edge_weight) 
        x = x.relu()
        x = self.conv2(x, edge_index,edge_weight) 
        x = x.relu()
        x = self.conv3(x, edge_index,edge_weight)
        x = x.relu() 
        x = self.dropout(x)  
        x = self.lin(x)

        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data.to(device)
model = GATv2(hidden_channels=64).to(device)
print(model)

GATv2(
  (conv1): GATv2Conv(17, 64, heads=1)
  (conv2): GATv2Conv(64, 64, heads=1)
  (conv3): GATv2Conv(64, 64, heads=1)
  (lin): Linear(in_features=64, out_features=2, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [4]:
from torch_geometric.nn import MessagePassing

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# 训练函数
def train():
    model.train()
    optimizer.zero_grad()

    out = model(data.x, data.edge_index,data.edge_attr)
    loss = criterion(out[train_mask], data.y[train_mask])

    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x,data.edge_index,data.edge_attr)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(200):
    loss = train()
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)

AssertionError: 

In [5]:
#SAGE是目前跑出来最好的，GAT其次，GCN最差
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = SAGEConv(data.num_node_features, hidden_channels)
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 =  SAGEConv(hidden_channels, 2*hidden_channels)
        self.bn2 = BatchNorm1d(2*hidden_channels)
        self.conv3 =  SAGEConv(2*hidden_channels, hidden_channels)
        self.bn3 = BatchNorm1d(hidden_channels)

        self.lin = Linear(hidden_channels, 2)
        self.dropout = Dropout(p=0.5)

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = x.relu()
        x = self.conv3(x, edge_index)
        x = self.bn3(x)
        x = x.relu()
        x = self.dropout(x)
        x = self.lin(x)

        return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data.to(device)
model = GCN(hidden_channels=64).to(device)
print(model)

GCN(
  (conv1): SAGEConv(17, 64, aggr=mean)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): SAGEConv(64, 128, aggr=mean)
  (bn2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): SAGEConv(128, 64, aggr=mean)
  (bn3): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (lin): Linear(in_features=64, out_features=2, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)


In [6]:
from torch_geometric.nn import MessagePassing

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# 训练函数
def train():
    model.train()
    optimizer.zero_grad()

    out = model(data.x, data.edge_index,data.edge_attr)
    loss = criterion(out[train_mask], data.y[train_mask])

    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x,data.edge_index,data.edge_attr)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(200):
    loss = train()
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)

Epoch 0: Loss 0.8718456625938416
Epoch 1: Loss 0.5405719876289368
Epoch 2: Loss 0.3474482297897339
Epoch 3: Loss 0.23772968351840973
Epoch 4: Loss 0.17143455147743225
Epoch 5: Loss 0.13083529472351074
Epoch 6: Loss 0.10724057257175446
Epoch 7: Loss 0.09200483560562134
Epoch 8: Loss 0.08204593509435654
Epoch 9: Loss 0.07612668722867966
Epoch 10: Loss 0.07224703580141068
Epoch 11: Loss 0.06985025852918625
Epoch 12: Loss 0.06862284988164902
Epoch 13: Loss 0.0681423470377922
Epoch 14: Loss 0.0669810026884079
Epoch 15: Loss 0.06743597984313965
Epoch 16: Loss 0.06774383038282394
Epoch 17: Loss 0.06783169507980347
Epoch 18: Loss 0.06728613376617432
Epoch 19: Loss 0.06775026768445969
Epoch 20: Loss 0.06792693585157394
Epoch 21: Loss 0.06784023344516754
Epoch 22: Loss 0.06837774813175201
Epoch 23: Loss 0.06781025975942612
Epoch 24: Loss 0.0679018571972847
Epoch 25: Loss 0.06776556372642517
Epoch 26: Loss 0.0678892508149147
Epoch 27: Loss 0.0674399882555008
Epoch 28: Loss 0.06699852645397186
Epo


KeyboardInterrupt



# 构造测试集

In [36]:
#用valdata本地测一下分数
from sklearn.metrics import auc,roc_auc_score,roc_curve
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])
auc_score

In [12]:
from torch_geometric.nn import MessagePassing, global_mean_pool
import torch.nn.functional as F
import torch

class EdgeWeightedConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(EdgeWeightedConv, self).__init__(aggr='add')  # "add" aggregation.
        self.lin = torch.nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index, edge_weight):
        # x: [N, in_channels], edge_index: [2, E], edge_weight: [E]
        edge_weight = edge_weight.unsqueeze(-1)  # [E, 1]
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x, edge_weight=edge_weight)

    def message(self, x_j, edge_weight):
        return edge_weight * self.lin(x_j)

class Net(torch.nn.Module):
    def __init__(self, num_node_features, hidden_dim):
        super(Net, self).__init__()
        self.conv1 = EdgeWeightedConv(num_node_features, hidden_dim)
        self.conv2 = EdgeWeightedConv(hidden_dim, hidden_dim)
        self.fc = torch.nn.Linear(hidden_dim, 2)

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr

        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.relu(self.conv2(x, edge_index, edge_weight))
        x = F.dropout(x, training=self.training)
        x = self.fc(x)
        return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 使用示例
model = Net(17, hidden_dim=64).to(device)
# 判断是否有GPU
data = data.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)

# 训练函数
def train(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

# 预测函数
def predict(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data)
        predictions = torch.softmax(out, dim=1)
        return predictions

# 训练模型
for epoch in range(200):
    loss = train(model, data, optimizer, criterion)
    print(f'Epoch {epoch}: Loss {loss}')

# 使用模型进行预测
predictions = predict(model, data)



Epoch 0: Loss 0.9720125794410706
Epoch 1: Loss 0.6166316270828247
Epoch 2: Loss 0.6127698421478271
Epoch 3: Loss 0.6083108186721802
Epoch 4: Loss 0.6023171544075012
Epoch 5: Loss 0.5961865782737732
Epoch 6: Loss 0.5786877870559692
Epoch 7: Loss 0.5693234801292419
Epoch 8: Loss 0.5544764995574951
Epoch 9: Loss 0.5301185250282288
Epoch 10: Loss 0.5032992959022522
Epoch 11: Loss 0.4874825179576874
Epoch 12: Loss 0.4701153039932251
Epoch 13: Loss 0.4462882876396179
Epoch 14: Loss 0.42865708470344543
Epoch 15: Loss 0.4142681956291199
Epoch 16: Loss 0.3918411135673523
Epoch 17: Loss 0.37842851877212524
Epoch 18: Loss 0.3660755753517151
Epoch 19: Loss 0.3521394729614258
Epoch 20: Loss 0.3356037735939026
Epoch 21: Loss 0.32726117968559265
Epoch 22: Loss 0.3153165280818939
Epoch 23: Loss 0.3064103424549103
Epoch 24: Loss 0.2979830801486969
Epoch 25: Loss 0.2894017696380615
Epoch 26: Loss 0.2815624475479126
Epoch 27: Loss 0.27388995885849
Epoch 28: Loss 0.2666645646095276
Epoch 29: Loss 0.259465

In [13]:
from sklearn.metrics import auc,roc_auc_score,roc_curve
correct=0
pred_test=predictions.cpu().numpy()[valid_mask]
auc_score=roc_auc_score(y[valid_mask],pred_test[:,1])

In [None]:
prob=torch.nn.functional.softmax(predictions, dim=1)

In [12]:
pred=predictions.cpu().numpy()[test_mask]
pred[:,1]=pred[:,1].astype(float)

In [45]:
predictions.argmax(dim=1)

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')

In [46]:
(predictions.argmax(dim=1)==0).sum()

tensor(579157, device='cuda:0')

In [14]:

pred = model(data).detach().cpu().numpy()
y

tensor([[3.4939, 2.9809, 0.0000,  ..., 0.0000, 2.4877, 2.3615],
        [2.2102, 1.9009, 0.0000,  ..., 0.0000, 2.5485, 1.6243],
        [3.0660, 2.6209, 0.0000,  ..., 0.0000, 2.5080, 2.1158],
        ...,
        [2.6748, 2.2563, 0.0000,  ..., 0.0000, 2.6991, 1.9989],
        [4.1165, 3.4914, 0.0000,  ..., 0.0000, 2.9595, 2.7947],
        [2.9140, 2.5622, 0.0000,  ..., 0.0000, 0.0000, 2.8049]],
       device='cuda:0', grad_fn=<ReluBackward0>)


In [44]:
predy=pred[test_mask]

IndexError: index 551751 is out of bounds for axis 0 with size 193053

In [43]:
np.where(pred[:,0]<0.5)

(array([], dtype=int64),)

In [13]:
result=pd.DataFrame({'index':test_mask,'predict':pred[:,1]})
result.to_csv('gcn.csv',index=None)