<a href="https://colab.research.google.com/github/sggyuan/GNN/blob/main/simplegdata%E8%BD%AC%E6%8D%A2%E7%89%88%E6%9C%AC%E8%BE%93%E5%85%A5vgae.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

2.4.0+cu121
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [2]:
import torch
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import torch
from torch_geometric.data import Data as PyGData

# def SimpleGraphData
class SimpleGraphData:
    def __init__(self, x, edge_index, y=None):
        self.x = x  # node feature
        self.edge_index = edge_index  # edge index
        self.y = y  # node label
        self.num_nodes = x.size(0)
        self.num_edges = edge_index.size(1)

    def __repr__(self):
        return f'SimpleGraphData(num_nodes={self.num_nodes}, num_edges={self.num_edges})'

# load data
data = torch.load('/content/drive/MyDrive/my_graph_data-4.pt')

print("Loaded data:", data)

# transfer SimpleGraphData to PyTorch Geometric Data
pyg_data = PyGData(
    x=data.x,
    edge_index=data.edge_index,
    y=data.y if hasattr(data, 'y') else None
)

# Remove the mask if it exists
pyg_data.train_mask = pyg_data.val_mask = pyg_data.test_mask = None

print("Converted PyG data:", pyg_data)

# RandomLinkSplit
from torch_geometric.transforms import RandomLinkSplit

transform = RandomLinkSplit(
    num_val=0.05,
    num_test=0.1,
    is_undirected=True,
    add_negative_train_samples=False,
)

train_data, val_data, test_data = transform(pyg_data)

print("Train data:", train_data)
print("Val data:", val_data)
print("Test data:", test_data)

Loaded data: SimpleGraphData(num_nodes=11241, num_edges=80252)
Converted PyG data: Data(x=[11241, 20], edge_index=[2, 80252], y=[100000])


  data = torch.load('/content/drive/MyDrive/my_graph_data-4.pt')


Train data: Data(x=[11241, 20], edge_index=[2, 68218], y=[100000], edge_label=[34109], edge_label_index=[2, 34109])
Val data: Data(x=[11241, 20], edge_index=[2, 68218], y=[100000], edge_label=[4012], edge_label_index=[2, 4012])
Test data: Data(x=[11241, 20], edge_index=[2, 72230], y=[100000], edge_label=[8024], edge_label_index=[2, 8024])


In [5]:
data

SimpleGraphData(num_nodes=11241, num_edges=80252)

In [25]:
class GCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv2 = GCNConv(2 * out_channels, out_channels, cached=True) # cached only for transductive learning



    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

In [22]:
from torch_geometric.nn import GAE
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [27]:
out_channels = 2
num_features = pyg_data.num_features
epochs = 100

model = GAE(GCNEncoder(num_features, out_channels))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

x = train_data.x.to(device)
train_edge_index = train_data.edge_index.to(device)
train_edge_label_index = train_data.edge_label_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
scheduler = ReduceLROnPlateau(optimizer, 'max', patience=10, factor=0.5)

In [18]:
import torch.nn.functional as F
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_edge_index)
    loss = model.recon_loss(z, train_edge_label_index)
    loss.backward()
    optimizer.step()
    return loss.item()

def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)




In [28]:

best_val_auc = 0
patience = 20
counter = 0

for epoch in range(1, epochs + 1):
    loss = train()
    auc, ap = test(val_data.edge_label_index[:, val_data.edge_label == 1],
                   val_data.edge_label_index[:, val_data.edge_label == 0])

    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, AUC: {auc:.4f}, AP: {ap:.4f}')

    scheduler.step(auc)  # 根据验证集AUC调整学习率

    if auc > best_val_auc:
        best_val_auc = auc
        counter = 0
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping")
            break

# 加载最佳模型进行测试
model.load_state_dict(torch.load('best_model.pth'))
test_auc, test_ap = test(test_data.edge_label_index[:, test_data.edge_label == 1],
                         test_data.edge_label_index[:, test_data.edge_label == 0])
print(f'Final Test AUC: {test_auc:.4f}, AP: {test_ap:.4f}')

Epoch: 001, Loss: 26.9329, AUC: 0.6217, AP: 0.5702
Epoch: 002, Loss: 27.0666, AUC: 0.6392, AP: 0.5820
Epoch: 003, Loss: 26.2680, AUC: 0.6564, AP: 0.5941
Epoch: 004, Loss: 25.2898, AUC: 0.6722, AP: 0.6059
Epoch: 005, Loss: 24.3855, AUC: 0.6925, AP: 0.6221
Epoch: 006, Loss: 22.2078, AUC: 0.7060, AP: 0.6333
Epoch: 007, Loss: 20.0024, AUC: 0.7210, AP: 0.6423
Epoch: 008, Loss: 20.4213, AUC: 0.7284, AP: 0.6483
Epoch: 009, Loss: 19.9113, AUC: 0.7484, AP: 0.6659
Epoch: 010, Loss: 18.1974, AUC: 0.7627, AP: 0.6794
Epoch: 011, Loss: 16.2679, AUC: 0.7594, AP: 0.6850
Epoch: 012, Loss: 15.4764, AUC: 0.7739, AP: 0.7007
Epoch: 013, Loss: 14.7872, AUC: 0.7866, AP: 0.7166
Epoch: 014, Loss: 13.5051, AUC: 0.8003, AP: 0.7350
Epoch: 015, Loss: 12.2829, AUC: 0.8093, AP: 0.7510
Epoch: 016, Loss: 10.7803, AUC: 0.8212, AP: 0.7698
Epoch: 017, Loss: 9.4104, AUC: 0.8274, AP: 0.7830
Epoch: 018, Loss: 8.1834, AUC: 0.8312, AP: 0.7944
Epoch: 019, Loss: 7.3908, AUC: 0.7966, AP: 0.7824
Epoch: 020, Loss: 6.3860, AUC: 0.7

  model.load_state_dict(torch.load('best_model.pth'))


In [29]:
for epoch in range(1, epochs + 1):
    loss = train()

    # Evaluate using the val set
    auc, ap = test(val_data.edge_label_index[:, val_data.edge_label == 1],
                   val_data.edge_label_index[:, val_data.edge_label == 0])

    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, AUC: {auc:.4f}, AP: {ap:.4f}')

# Final evaluation on the test set
test_auc, test_ap = test(test_data.edge_label_index[:, test_data.edge_label == 1],
                         test_data.edge_label_index[:, test_data.edge_label == 0])
print(f'Final Test AUC: {test_auc:.4f}, AP: {test_ap:.4f}')

Epoch: 001, Loss: 7.4827, AUC: 0.8287, AP: 0.7936
Epoch: 002, Loss: 7.1678, AUC: 0.8216, AP: 0.7918
Epoch: 003, Loss: 6.9744, AUC: 0.8018, AP: 0.7838
Epoch: 004, Loss: 6.6599, AUC: 0.7974, AP: 0.7825
Epoch: 005, Loss: 6.1592, AUC: 0.7854, AP: 0.7765
Epoch: 006, Loss: 6.0224, AUC: 0.7435, AP: 0.7527
Epoch: 007, Loss: 5.4504, AUC: 0.7089, AP: 0.7302
Epoch: 008, Loss: 4.9433, AUC: 0.6854, AP: 0.7158
Epoch: 009, Loss: 4.5722, AUC: 0.6651, AP: 0.7022
Epoch: 010, Loss: 4.3157, AUC: 0.6520, AP: 0.6930
Epoch: 011, Loss: 4.1145, AUC: 0.6381, AP: 0.6834
Epoch: 012, Loss: 3.7496, AUC: 0.6460, AP: 0.6831
Epoch: 013, Loss: 3.3492, AUC: 0.6159, AP: 0.6489
Epoch: 014, Loss: 3.0815, AUC: 0.5742, AP: 0.6180
Epoch: 015, Loss: 2.8902, AUC: 0.4851, AP: 0.5422
Epoch: 016, Loss: 2.6058, AUC: 0.4510, AP: 0.5120
Epoch: 017, Loss: 2.5041, AUC: 0.4260, AP: 0.4893
Epoch: 018, Loss: 2.4355, AUC: 0.4155, AP: 0.4777
Epoch: 019, Loss: 2.2875, AUC: 0.4083, AP: 0.4714
Epoch: 020, Loss: 2.2447, AUC: 0.4113, AP: 0.4737


In [31]:
from torch_geometric.nn import VGAE

In [32]:
class VariationalGCNEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(VariationalGCNEncoder, self).__init__()
        self.conv1 = GCNConv(in_channels, 2 * out_channels, cached=True) # cached only for transductive learning
        self.conv_mu = GCNConv(2 * out_channels, out_channels, cached=True)
        self.conv_logstd = GCNConv(2 * out_channels, out_channels, cached=True)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv_mu(x, edge_index), self.conv_logstd(x, edge_index)

In [33]:
out_channels = 2
num_features = pyg_data.num_features
epochs = 300

model = VGAE(VariationalGCNEncoder(num_features, out_channels))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

x = train_data.x.to(device)
train_edge_index = train_data.edge_index.to(device)
train_edge_label_index = train_data.edge_label_index.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [34]:
def train():
    model.train()
    optimizer.zero_grad()
    z = model.encode(x, train_edge_index)
    loss = model.recon_loss(z, train_data.edge_label_index)
    loss = loss + (1 / train_data.num_nodes) * model.kl_loss()
    loss.backward()
    optimizer.step()
    return float(loss)

def test(pos_edge_index, neg_edge_index):
    model.eval()
    with torch.no_grad():
        z = model.encode(x, train_edge_index)
    return model.test(z, pos_edge_index, neg_edge_index)

In [35]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('runs/VGAE_experiment_2d_300_epochs')

for epoch in range(1, epochs + 1):
    loss = train()


    val_pos_edges = val_data.edge_label_index[:, val_data.edge_label == 1]
    val_neg_edges = val_data.edge_label_index[:, val_data.edge_label == 0]
    auc, ap = test(val_pos_edges, val_neg_edges)

    print('Epoch: {:03d}, Loss: {:.4f}, AUC: {:.4f}, AP: {:.4f}'.format(epoch, loss, auc, ap))

    writer.add_scalar('loss', loss, epoch)
    writer.add_scalar('auc_val', auc, epoch)
    writer.add_scalar('ap_val', ap, epoch)


test_pos_edges = test_data.edge_label_index[:, test_data.edge_label == 1]
test_neg_edges = test_data.edge_label_index[:, test_data.edge_label == 0]
test_auc, test_ap = test(test_pos_edges, test_neg_edges)
print('Final Test AUC: {:.4f}, AP: {:.4f}'.format(test_auc, test_ap))

writer.close()

Epoch: 001, Loss: 22499.4727, AUC: 0.5077, AP: 0.5039
Epoch: 002, Loss: 22145.8027, AUC: 0.5090, AP: 0.5045
Epoch: 003, Loss: 21358.6484, AUC: 0.5112, AP: 0.5057
Epoch: 004, Loss: 20017.2324, AUC: 0.5123, AP: 0.5084
Epoch: 005, Loss: 19852.4082, AUC: 0.5121, AP: 0.5107
Epoch: 006, Loss: 19667.0859, AUC: 0.5089, AP: 0.5099
Epoch: 007, Loss: 19525.8320, AUC: 0.5106, AP: 0.5116
Epoch: 008, Loss: 19373.1367, AUC: 0.5115, AP: 0.5124
Epoch: 009, Loss: 19207.8516, AUC: 0.5119, AP: 0.5131
Epoch: 010, Loss: 19056.6465, AUC: 0.5139, AP: 0.5144
Epoch: 011, Loss: 18897.2754, AUC: 0.5175, AP: 0.5164
Epoch: 012, Loss: 18730.6504, AUC: 0.5202, AP: 0.5185
Epoch: 013, Loss: 18554.7422, AUC: 0.5234, AP: 0.5211
Epoch: 014, Loss: 18326.1836, AUC: 0.5278, AP: 0.5243
Epoch: 015, Loss: 18159.4766, AUC: 0.5465, AP: 0.5357
Epoch: 016, Loss: 17976.4395, AUC: 0.5512, AP: 0.5390
Epoch: 017, Loss: 17776.7793, AUC: 0.5547, AP: 0.5414
Epoch: 018, Loss: 17524.7227, AUC: 0.5601, AP: 0.5452
Epoch: 019, Loss: 17274.0625

In [36]:
writer.close()

In [37]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir runs