In [36]:
import os.path as osp

import torch
from sklearn.metrics import roc_auc_score

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
from torch_geometric.utils import negative_sampling

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform = T.Compose([
    T.NormalizeFeatures(),
    T.ToDevice(device),
    T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      add_negative_train_samples=False),
])

In [3]:
path = osp.join(osp.dirname(osp.realpath(".")), '..', 'data', 'Planetoid')
dataset = Planetoid(path, name='Cora', transform=transform)
# After applying the `RandomLinkSplit` transform, the data is transformed from
# a data object to a list of tuples (train_data, val_data, test_data), with
# each element representing the corresponding split.
train_data, val_data, test_data = dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


In [45]:
train_data

Data(x=[2708, 1433], edge_index=[2, 8976], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_label=[4488], edge_label_index=[2, 4488])

In [46]:
def add_edge_labels(graph):
    transform = T.RandomLinkSplit(num_val=0.05, num_test=0.1, is_undirected=True,
                      add_negative_train_samples=False)
    return transform(graph)

In [47]:
labeled_graphs = [add_edge_labels(graph) for graph in data_list]

In [62]:
labeled_graphs[0]

(Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[654], edge_label_index=[2, 654]),
 Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[76], edge_label_index=[2, 76]),
 Data(x=[101, 2], edge_index=[2, 1384], y=[101, 101], pos=[101, 2], edge_label=[152], edge_label_index=[2, 152]))

In [65]:
for batch in train_loader:
    print(batch)

DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 30300], y=[2020, 101], pos=[2020, 2], batch=[2020], ptr=[21])


In [37]:
class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def encode(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

    def decode(self, z, edge_label_index):
        return (z[edge_label_index[0]] * z[edge_label_index[1]]).sum(dim=-1)

    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()

In [38]:
model = Net(data_list[0].num_features, 128, 64).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()

In [35]:
import numpy as np

In [58]:
def train(loader):
    model.train()
    total_loss = 0

    for batch in loader:
        optimizer.zero_grad()
        z = model.encode(batch.x, batch.edge_index)

        # We perform a new round of negative sampling for every training epoch:
        neg_edge_index = negative_sampling(
            edge_index=batch.edge_index, num_nodes=batch.num_nodes,
            num_neg_samples=batch.edge_label_index.size(1), method='sparse')

        # Concat positive and negative edge indices.
        edge_label_index = torch.cat(
            [batch.edge_label_index, neg_edge_index],
            dim=-1,
        )
        # Label for positive edges: 1, for negative edges: 0.
        edge_label = torch.cat([
            batch.edge_label,
            batch.edge_label.new_zeros(neg_edge_index.size(1))
        ], dim=0)

        # Note: The model is trained in a supervised manner using the given
        # `edge_label_index` and `edge_label` targets.
        out = model.decode(z, edge_label_index).view(-1)
        loss = criterion(out, edge_label)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)


@torch.no_grad()
def test(loader):
    model.eval()
    all_out = []
    all_labels = []

    for batch in loader:
        z = model.encode(batch.x, batch.edge_index)
        out = model.decode(z, batch.edge_label_index).view(-1).sigmoid()
        all_out.append(out.cpu().numpy())
        all_labels.append(batch.edge_label.cpu().numpy())

    all_out = np.concatenate(all_out)
    all_labels = np.concatenate(all_labels)
    return roc_auc_score(all_labels, all_out)

In [94]:
# Train/Test Loop
best_val_auc = final_test_auc = 0
for epoch in range(1, 101):
    loss = train(train_loader)
    val_auc = test(val_loader)
    test_auc = test(test_loader)
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        final_test_auc = test_auc
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
          f'Test: {test_auc:.4f}')

print(f'Final Test: {final_test_auc:.4f}')

Epoch: 001, Loss: 107.0461, Val: 0.8452, Test: 0.8478
Epoch: 002, Loss: 1.4814, Val: 0.8528, Test: 0.8560
Epoch: 003, Loss: 0.7313, Val: 0.8598, Test: 0.8625
Epoch: 004, Loss: 0.5962, Val: 0.8658, Test: 0.8684
Epoch: 005, Loss: 0.5604, Val: 0.8709, Test: 0.8731
Epoch: 006, Loss: 0.5507, Val: 0.8736, Test: 0.8755
Epoch: 007, Loss: 0.5467, Val: 0.8755, Test: 0.8775
Epoch: 008, Loss: 0.5457, Val: 0.8770, Test: 0.8786
Epoch: 009, Loss: 0.5441, Val: 0.8774, Test: 0.8790
Epoch: 010, Loss: 0.5432, Val: 0.8788, Test: 0.8805
Epoch: 011, Loss: 0.5434, Val: 0.8798, Test: 0.8815
Epoch: 012, Loss: 0.5422, Val: 0.8813, Test: 0.8832
Epoch: 013, Loss: 0.5423, Val: 0.8820, Test: 0.8839
Epoch: 014, Loss: 0.5402, Val: 0.8832, Test: 0.8852
Epoch: 015, Loss: 0.5402, Val: 0.8849, Test: 0.8870
Epoch: 016, Loss: 0.5389, Val: 0.8867, Test: 0.8889
Epoch: 017, Loss: 0.5376, Val: 0.8884, Test: 0.8907
Epoch: 018, Loss: 0.5377, Val: 0.8910, Test: 0.8934
Epoch: 019, Loss: 0.5363, Val: 0.8930, Test: 0.8955
Epoch: 020

In [95]:


z = model.encode(train_size[0].x, train_size[0].edge_index)
final_edge_index = model.decode_all(z)

In [96]:
z

tensor([[ 0.0975,  0.0965,  0.1179,  ..., -0.2019, -0.0823,  0.2832],
        [ 0.1081, -0.1024,  0.2862,  ...,  0.0125, -0.0514, -0.0633],
        [ 0.0154, -0.0206,  0.2033,  ..., -0.2354, -0.1704,  0.2083],
        ...,
        [ 0.2875, -0.0498, -0.3933,  ..., -0.0443,  0.0819, -0.1261],
        [ 0.0784, -0.0241,  0.2030,  ...,  0.1055,  0.0498, -0.2083],
        [ 0.3449, -0.1079, -0.2666,  ..., -0.3180,  0.0281, -0.0787]],
       grad_fn=<AddBackward0>)

In [112]:
fei = final_edge_index.tolist()
edges_pred = {k:[] for k in range(101)}
edges_pred_inv = {k:[] for k in range(101)}
for i in range(len(fei[0])):
    edges_pred[fei[0][i]].append(fei[1][i])
    edges_pred_inv[fei[1][i]].append(fei[0][i])

In [117]:
ts0 = train_size[0].edge_index.tolist()
edges = {k:[] for k in range(101)}
edges_inv = {k:[] for k in range(101)}
for i in range(len(ts0[0])):
    edges[ts0[0][i]].append(ts0[1][i])
    edges_inv[ts0[1][i]].append(ts0[0][i])

In [118]:
print(edges[0])
print(edges_inv[0])

[72, 68, 22, 88]
[72, 68, 22, 88]


In [119]:
print(len(edges_pred[0]))
print(edges_pred_inv[0])

38
[0, 1, 2, 4, 5, 7, 12, 16, 21, 22, 24, 26, 30, 33, 35, 36, 38, 39, 42, 45, 47, 48, 50, 54, 57, 58, 59, 67, 68, 72, 76, 79, 81, 84, 87, 88, 89, 94]


now a chatgpt example

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GNNEncoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNEncoder, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [16]:
class DotProductDecoder(nn.Module):
    def __init__(self):
        super(DotProductDecoder, self).__init__()

    def forward(self, z):
        adj_pred = torch.sigmoid(torch.matmul(z, z.t()))
        return adj_pred

In [17]:
class GraphAutoencoder(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GraphAutoencoder, self).__init__()
        self.encoder = GNNEncoder(input_dim, hidden_dim, output_dim)
        self.decoder = DotProductDecoder()

    def forward(self, x, edge_index):
        z = self.encoder(x, edge_index)
        adj_pred = self.decoder(z)
        return adj_pred, z

In [24]:
from math import sqrt
import torch
from torch_geometric.data import Data
from random import randint
from sys import float_info

instances = {}
for k in range(0, 1000):
    nodes = {}
    for i in range(0, 50):
        lat_i = randint(0, 100)
        lon_i = randint(0, 100)
        node_i = (lat_i, lon_i)
        lat_j = randint(0, 100)
        lon_j = randint(0, 100)
        node_j = (lat_j, lon_j)
        nodes[i + 1] = node_i
        nodes[i + 51] = node_j

    dist = {}
    pairs = {}
    for i in range(1, 101):
        for j in range(1, 101):
            if i != j:
                dist[i,j] = sqrt( (nodes[i][0] - nodes[j][0])**2 + (nodes[i][1] - nodes[j][1])**2 )
            else:
                dist[i,j] = float_info.max
    for i in range(1, 101):
        for j in range(1, 101):
            if i not in pairs:
                pairs[i] = j
            if i != j:
                if dist[i,j] < dist[i,pairs[i]]:
                    pairs[i] = j

    nodes[0] = (0,0)
    for i in range(1,101):
        dist[0,i] = sqrt( (nodes[0][0] - nodes[i][0])**2 + (nodes[0][1] - nodes[i][1])**2 )
        dist[i,0] = dist[0,i]
    y = [[0 for _ in range(101)] for _ in range(101)]
    for i in range(101):
        if i > 0:
            y[i][pairs[i]] = 1
                
    instances[k] = {"nodes": nodes, "dist": dist, "y": y}

from torch_geometric.nn import knn_graph
data_list = []
for instance_name in instances:
    y = torch.tensor(instances[instance_name]["y"], dtype=torch.float)
    x = torch.tensor([instances[instance_name]["nodes"][i] for i in range(0, 101)], dtype=torch.float)
    pos = []
    for i in range(101):
        pos.append(instances[instance_name]["nodes"][i])
    pos = torch.tensor(pos, dtype=torch.double)
    # ## filtering by TW, strict
    # complete_graph_list = []
    # for i in range(101):
    #     for j in range(101):
    #         if i!=j:
    #             try:
    #                 if instance_dict[instance_name][i][5] + instance_dict[instance_name][i][6] + loc_dict[i][j] < instance_dict[instance_name][i][5]:
    #                     complete_graph_list.append([i,j])
    #             except:
    #                 continue
    # edge_index = torch.tensor(complete_graph_list, dtype=torch.double).t().contiguous()
    ## end filtering
    data_list.append(Data(x=x, y=y, edge_index = knn_graph(x, 15), pos=pos))

In [88]:
from torch_geometric.loader import DataLoader

In [89]:
labeled_graphs[0]

(Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[654], edge_label_index=[2, 654]),
 Data(x=[101, 2], edge_index=[2, 1308], y=[101, 101], pos=[101, 2], edge_label=[76], edge_label_index=[2, 76]),
 Data(x=[101, 2], edge_index=[2, 1384], y=[101, 101], pos=[101, 2], edge_label=[152], edge_label_index=[2, 152]))

In [90]:
from torch_geometric.data import Batch

def custom_collate(data_list):
    batch = Batch.from_data_list(data_list)

    # Manually handle edge_label and edge_label_index if they exist in the data_list
    if hasattr(data_list[0], 'edge_label_index'):
        edge_label_index_list = [data.edge_label_index for data in data_list]
        batch.edge_label_index = torch.cat(edge_label_index_list, dim=1)
        
    if hasattr(data_list[0], 'edge_label'):
        edge_label_list = [data.edge_label for data in data_list]
        batch.edge_label = torch.cat(edge_label_list, dim=0)
        
    return batch

In [91]:
train_size = [g[0] for g in labeled_graphs]
val_size = [g[1] for g in labeled_graphs]
test_size = [g[2] for g in labeled_graphs]

In [92]:

train_loader = DataLoader(train_size, batch_size=20, shuffle=True)
val_loader = DataLoader(val_size, batch_size=20, shuffle=False)
test_loader = DataLoader(test_size, batch_size=20, shuffle=False)

In [93]:
for batch in train_loader:
    print(batch)

DataBatch(x=[2020, 2], edge_index=[2, 25794], y=[2020, 101], pos=[2020, 2], edge_label=[12897], edge_label_index=[2, 12897], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25712], y=[2020, 101], pos=[2020, 2], edge_label=[12856], edge_label_index=[2, 12856], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25696], y=[2020, 101], pos=[2020, 2], edge_label=[12848], edge_label_index=[2, 12848], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25478], y=[2020, 101], pos=[2020, 2], edge_label=[12739], edge_label_index=[2, 12739], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25672], y=[2020, 101], pos=[2020, 2], edge_label=[12836], edge_label_index=[2, 12836], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25806], y=[2020, 101], pos=[2020, 2], edge_label=[12903], edge_label_index=[2, 12903], batch=[2020], ptr=[21])
DataBatch(x=[2020, 2], edge_index=[2, 25796], y=[2020, 101], pos=[2020, 2], edge_label=[12898], edge_label

In [29]:
import torch.optim as optim
from torch_geometric.utils import to_dense_adj

# Assume `data_list` contains multiple graphs with `x`, `edge_index` attributes.
# data_list = [...] 

input_dim = 2
hidden_dim = 64
output_dim = 32
model = GraphAutoencoder(input_dim, hidden_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.BCELoss()

for epoch in range(50):
    model.train()
    total_loss = 0
    for data in loader:
        # optimizer.zero_grad()
        adj_dense = to_dense_adj(data.edge_index, max_num_nodes=data.num_nodes)[0]
        adj_dense = adj_dense.view(-1)
        adj_pred, _ = model(data.x, data.edge_index)
        loss = criterion(adj_pred.view(-1), adj_dense)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch}, Loss: {total_loss / len(data_list)}')


Epoch 0, Loss: 1.9940592956542968
Epoch 1, Loss: 1.9940592956542968
Epoch 2, Loss: 1.9940592956542968


KeyboardInterrupt: 