In [1]:
%load_ext autoreload
%autoreload 2
import gust  # library for loading graph data

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.distributions as dist
import time

torch.set_default_tensor_type('torch.cuda.FloatTensor')
%matplotlib inline
sns.set_style('whitegrid')

In [2]:
# Load the dataset using `gust` library
# graph.standardize() makes the graph unweighted, undirected and selects
# the largest connected component
# graph.unpack() returns the necessary vectors / matrices

%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, '../')
from utils import graph_util
A,  y = graph_util.load_dataset('cora')


# A - adjacency matrix 
# X - attribute matrix - not needed
# y - node labels

if (A != A.T).sum() > 0:
    raise RuntimeError("The graph must be undirected!")

if (A.data != 1).sum() > 0:
    raise RuntimeError("The graph must be unweighted!")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
num_nodes = A.shape[0]
num_edges = A.sum()

# Convert adjacency matrix to a CUDA Tensor
adj = torch.FloatTensor(A.toarray()).cuda()

adj.nonzero()
print(adj.shape)

torch.Size([2485, 2485])


In [4]:
torch.manual_seed(123)
# Define the embedding matrix
embedding_dim = 64
emb = nn.Parameter(torch.empty(num_nodes, embedding_dim).normal_(0.0, 1.0))

# Initialize the bias
# The bias is initialized in such a way that if the dot product between two embedding vectors is 0 
# (i.e. z_i^T z_j = 0), then their connection probability is sigmoid(b) equals to the 
# background edge probability in the graph. This significantly speeds up training
edge_proba = num_edges / (num_nodes**2 - num_nodes)
bias_init = np.log(edge_proba / (1 - edge_proba))
b = nn.Parameter(torch.Tensor([bias_init]))

# Regularize the embeddings but don't regularize the bias
# The value of weight_decay has a significant effect on the performance of the model (don't set too high!)
opt = torch.optim.Adam([
    {'params': [emb], 'weight_decay': 1e-7},
    {'params': [b]}],
    lr=1e-2)

In [5]:



def compute_loss_exp(adj, emb, b=0.0):
    prod= emb @ emb.t()
    term =1-torch.expm1(-prod) + 1e-5

    loss = F.binary_cross_entropy(term, adj, reduction='none')
    loss[np.diag_indices(adj.shape[0])] = 0.0
    # Since we consider graphs without self-loops, we don't want to compute loss
    # for the diagonal entries of the adjacency matrix.
    # This will kill the gradients on the diagonal.
    return loss.mean()
max_epochs = 1000
display_step = 250


RuntimeError: CUDA error: device-side assert triggered

In [None]:

compute_loss = compute_loss_exp

for epoch in range(max_epochs):
    opt.zero_grad()
    loss = compute_loss(adj, emb, b)
    loss.backward()
    opt.step()
    # Training loss is printed every display_step epochs
    if epoch % display_step == 0:
        print(f'Epoch {epoch:4d}, loss = {loss.item():.5f}')   