In [1]:
import networkx as nx
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.utils import to_networkx
from copy import deepcopy
from scipy.sparse import coo_matrix
from sklearn.metrics import roc_auc_score
import itertools
import dgl

In [2]:
cora = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
data = cora[0]

In [3]:
# Create a list of positive and negative edges
u, v = data.edge_index.numpy()

adj = coo_matrix((np.ones(data.num_edges), data.edge_index.numpy()))
adj_neg = 1 - adj.todense() - np.eye(data.num_nodes)
neg_u, neg_v = np.where(adj_neg != 0)

In [4]:
# Create train/test edge split
test_size = int(np.floor(data.num_edges * 0.1))
eids = np.random.permutation(np.arange(data.num_edges)) # Create an array of 'edge IDs'

train_pos_u, train_pos_v = data.edge_index[:, eids[test_size:]]
test_pos_u, test_pos_v   = data.edge_index[:, eids[:test_size]]

# Sample an equal amount of negative edges from  the graph, split into train/test
neg_eids = np.random.choice(len(neg_u), data.num_edges)
test_neg_u, test_neg_v = (
    neg_u[neg_eids[:test_size]],
    neg_v[neg_eids[:test_size]],
)
train_neg_u, train_neg_v = (
    neg_u[neg_eids[test_size:]],
    neg_v[neg_eids[test_size:]],
)

In [50]:
# Remove test edges from original graph

def remove_edges(G, edges):
    G_new = deepcopy(G)
    G_new.remove_edges_from(edges)
    return G_new

G = to_networkx(data, node_attrs=data.node_attrs())
G_train = remove_edges(G, np.column_stack([test_pos_u, test_pos_v])) 

train_g = dgl.from_networkx(G_train, node_attrs=list(G.nodes[0].keys()))


In [51]:
train_g

Graph(num_nodes=2708, num_edges=9501,
      ndata_schemes={'test_mask': Scheme(shape=(), dtype=torch.bool), 'y': Scheme(shape=(), dtype=torch.int64), 'x': Scheme(shape=(1433,), dtype=torch.float32), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [37]:
train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=data.num_nodes)
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=data.num_nodes)

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=data.num_nodes)
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=data.num_nodes)

In [52]:
from dgl.nn import SAGEConv
import dgl.function as fn

# ----------- 2. create model -------------- #
# build a two-layer GraphSAGE model
class GraphSAGE(torch.nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, "mean")
        self.conv2 = SAGEConv(h_feats, h_feats, "mean")

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h
    

class DotPredictor(torch.nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata["h"] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
            g.apply_edges(fn.u_dot_v("h", "h", "score"))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
            return g.edata["score"][:, 0]

In [53]:
model = GraphSAGE(train_g.ndata["x"].shape[1], 16)
# You can replace DotPredictor with MLPPredictor.
# pred = MLPPredictor(16)
pred = DotPredictor()


def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]
    )
    return F.binary_cross_entropy_with_logits(scores, labels)


def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]
    ).numpy()
    return roc_auc_score(labels, scores)

In [54]:
# ----------- 3. set up loss and optimizer -------------- #
# in this case, loss will in training loop
optimizer = torch.optim.Adam(
    itertools.chain(model.parameters(), pred.parameters()), lr=0.01
)

# ----------- 4. training -------------------------------- #
all_logits = []
for e in range(100):
    # forward
    h = model(train_g, train_g.ndata["x"])
    pos_score = pred(train_pos_g, h)
    neg_score = pred(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)

    # backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if e % 5 == 0:
        print("In epoch {}, loss: {}".format(e, loss))

# ----------- 5. check results ------------------------ #


with torch.no_grad():
    pos_score = pred(test_pos_g, h)
    neg_score = pred(test_neg_g, h)
    print("AUC", compute_auc(pos_score, neg_score))

In epoch 0, loss: 0.7107418179512024
In epoch 5, loss: 0.6896830797195435
In epoch 10, loss: 0.6702037453651428
In epoch 15, loss: 0.6157683730125427
In epoch 20, loss: 0.5559787750244141
In epoch 25, loss: 0.515147864818573
In epoch 30, loss: 0.4857601225376129
In epoch 35, loss: 0.4715117812156677
In epoch 40, loss: 0.4501242935657501
In epoch 45, loss: 0.43352076411247253
In epoch 50, loss: 0.4171629846096039
In epoch 55, loss: 0.4002867043018341
In epoch 60, loss: 0.3837142586708069
In epoch 65, loss: 0.36696240305900574
In epoch 70, loss: 0.3509295582771301
In epoch 75, loss: 0.3340570032596588
In epoch 80, loss: 0.31674373149871826
In epoch 85, loss: 0.2993151843547821
In epoch 90, loss: 0.28167659044265747
In epoch 95, loss: 0.26417118310928345
AUC 0.8575728307989489
