In [5]:
%load_ext autoreload
%autoreload 2
import gust  # library for loading graph data

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.distributions as dist
import time

torch.set_default_tensor_type('torch.cuda.FloatTensor')
%matplotlib inline
sns.set_style('whitegrid')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
A_cora, X_cora, _, y_cora = gust.load_dataset('cora').standardize().unpack()
A_cita, X_cita, _, y_cita = gust.load_dataset('citeseer').standardize().unpack()



In [11]:
num_nodes_co = A_cora.shape[0]
num_edges_co = A_cora.sum()

num_nodes_ci = A_cita.shape[0]
num_edges_ci = A_cita.sum()

# Convert adjacency matrix to a CUDA Tensor
adj_co = torch.FloatTensor(A_cora.toarray()).cuda()
adj_ci = torch.FloatTensor(A_cita.toarray()).cuda()

In [10]:
torch.manual_seed(123)

embedding_dim = 64
emb_co = nn.Parameter(torch.empty(num_nodes_co, embedding_dim).normal_(0.0, 1.0))

# Initialize the bias
# The bias is initialized in such a way that if the dot product between two embedding vectors is 0 
# (i.e. z_i^T z_j = 0), then their connection probability is sigmoid(b) equals to the 
# background edge probability in the graph. This significantly speeds up training
edge_proba = num_edges / (num_nodes_co**2 - num_nodes_co)
bias_init = np.log(edge_proba / (1 - edge_proba))
b = nn.Parameter(torch.Tensor([bias_init]))

# Regularize the embeddings but don't regularize the bias
# The value of weight_decay has a significant effect on the performance of the model (don't set too high!)
opt = torch.optim.Adam([
    {'params': [emb], 'weight_decay': 1e-7},
    {'params': [b]}],
    lr=1e-2)




NameError: name 'num_edges' is not defined

In [None]:
# There are many ways to compute the loss / negative log-likelihood of the model
def compute_loss_v1(adj, emb, b=0.0): 
    """Compute the negative log-likelihood of the Bernoulli model."""
    logits = emb @ emb.t() + b
    loss = F.binary_cross_entropy_with_logits(logits, adj, reduction='none')
    # Since we consider graphs without self-loops, we don't want to compute loss
    # for the diagonal entries of the adjacency matrix.
    # This will kill the gradients on the diagonal.
    loss[np.diag_indices(adj.shape[0])] = 0.0
    return loss.mean()

# This function uses the torch.distributions module
def compute_loss_v2(adj, emb, b=0.0): 
    """Compute the negative log-likelihood of the Bernoulli model."""
    logits = emb @ emb.t() + b
    distribution = dist.Bernoulli(logits=logits)
    log_probas = distribution.log_prob(adj)
    log_probas[np.diag_indices(adj.shape[0])] = 0.0
    loss = -log_probas.mean()
    return loss

# Here we compute the loss manually
def compute_loss_v3(adj, emb, b=0.0): 
    """Compute the negative log-likelihood of the Bernoulli model."""
    logits = emb @ emb.t() + b
    log_probas = adj * logits - F.softplus(logits)
    log_probas[np.diag_indices(adj.shape[0])] = 0.0
    loss = -log_probas.mean()
    return loss


# Approach 1: Naive apporach
def compute_loss_d1(adj, emb, b=0.0): 
    """Compute the rdf distance of the Bernoulli model."""
    # Initialization
    start_time = time.time()
    N,d=emb.shape
    squared_euclidian = torch.zeros(N,N).cuda()
    gamma= 0.1
    end_time= time.time()
    duration= end_time -start_time
    #print(f' Time for initialization = {duration:.5f}')
    # Compute squared euclidian
    start_time = time.time()
    for index, embedding in enumerate(emb):
        sub =  embedding-emb + 10e-9
        squared_euclidian[index,:]= torch.sum(torch.pow(sub,2),1)
    end_time= time.time()
    duration= end_time -start_time
    #print(f' Time for euclidian = {duration:.5f}')
    # Compute exponentianl
    start_time = time.time()
    radial_exp = torch.exp (-gamma * torch.sqrt(squared_euclidian))
    loss = F.binary_cross_entropy(radial_exp, adj, reduction='none')
    loss[np.diag_indices(adj.shape[0])] = 0.0
    end_time= time.time()
    duration= end_time -start_time
    #print(f' Time for loss  = {duration:.5f}')
    return loss.mean()


# Approach 2: Use the squareform function
def compute_loss_d2(adj, emb, b=0.0): 
    # Initialization
    start_time = time.time()
    N,d=emb.shape
    squared_euclidian = torch.zeros(N,N).cuda()
    gamma= 0.1
    end_time= time.time()
    duration= end_time -start_time
    #print(f' Time for initialization = {duration:.5f}')
    # Compute squared euclidian
    start_time = time.time()
    euclidian= torch.nn.functional.pdist(emb, p=2)
    euclidian_np= torch.from_numpy(euclidian.cpu().detach().numpy())
    matrix_euclidian_np = squareform(euclidian_np)
    matrix_euclidian = torch.from_numpy(matrix_euclidian_np).cuda()
    end_time= time.time()
    #print(f' Time for euclidian = {duration:.5f}')
    # Compute exponentianl
    start_time = time.time()
    radial_exp = torch.exp (-gamma * matrix_euclidian)
    loss = F.binary_cross_entropy(radial_exp, adj, reduction='none')
    loss[np.diag_indices(adj.shape[0])] = 0.0
    end_time= time.time()
    duration= end_time -start_time
    #print(f' Time for loss  = {duration:.5f}')

    return loss.mean()



# Approach 3: Build the matrix from scratch
def compute_loss_d3(adj, emb, b=0.0): 
    # Initialization
    start_time = time.time()
    N,d=emb.shape
    squared_euclidian = torch.zeros(N,N).cuda()
    gamma= 0.1
    end_time= time.time()
    duration= end_time -start_time
    #print(f' Time for initialization = {duration:.5f}')
    # Compute squared euclidian
    start_time = time.time()
    euclidian= torch.nn.functional.pdist(emb, p=2)
    start= 0
    for i in range(N):
        end = start + ( N - i -1)
        elem = len(euclidian[start:end])
        #print(f' start = {start:d}, end = {end:d},elements = {elem:d} ')
        squared_euclidian[i,i+1:] = euclidian[start:end] 
        squared_euclidian[i+1:,i] = euclidian[start:end] 
        start += (N-i-1) 
        end_time= time.time()
    #print(f' Time for euclidian = {duration:.5f}')
    # Compute exponentianl
    start_time = time.time()
    radial_exp = torch.exp (-gamma * squared_euclidian)
    loss = F.binary_cross_entropy(radial_exp, adj, reduction='none')
    end_time= time.time()
    duration= end_time -start_time
    #print(f' Time for loss  = {duration:.5f}')
    return loss.mean()

# Approach 4: Hold only a part of A 
def compute_loss_d4(adj, emb, b=0.1): 
    # Initialization
    N,d=emb.shape
    gamma= 0.1
    # Compute the rdf distance
    euclidian= torch.nn.functional.pdist(emb, p=2)
    radial_exp = torch.exp (-gamma * euclidian)
    # Extract the elements of the upper triangular matrix without the diagonal elements
    ind=torch.triu_indices(N,N,offset=1)
    labels = adj[ind[0].cpu().detach().numpy(),ind[1].cpu().detach().numpy()]
    # Compute the loss function
    loss = F.binary_cross_entropy(radial_exp, labels, reduction='none')
    return loss.mean()

# Approach 4: Hold only a part of A 
def compute_loss_d5(adj, emb, b=0.1): 
    # Initialization
    N,d=emb.shape
    gamma= 0.1
    # Compute the rdf distance
    euclidian= torch.nn.functional.pdist(emb, p=2).cpu()
    radial_exp = torch.exp (-gamma * euclidian).cpu()
    # Extract the elements of the upper triangular matrix without the diagonal elements
    ind=torch.triu_indices(N,N,offset=1).cpu()
    labels = adj[ind[0],ind[1]]
    # Compute the loss function
    loss = F.binary_cross_entropy(radial_exp.cuda(), labels.cuda(), reduction='none')
    return loss.mean()

def compute_loss_KL(adj, emb, b=0.0):
    degree= torch.from_numpy(A.sum(axis=1))
    inv_degree=torch.diagflat(1/degree).cuda()
    P = inv_degree.mm(adj) 
    loss = -(P*torch.log( 10e-9+ F.softmax(emb.mm(emb.t() ),dim=1,dtype=torch.float)))
    return loss.mean()

compute_loss = compute_loss_KL


def compute_loss_ber_exp1(adj, emb, b=0.1):
    #Init
    N,d=emb.shape
    gamma=0.001
    
    #get indices of upper triangular matrix
    ind=torch.triu_indices(N,N,offset=1)
    
    #compute f(z_i,z_j) = exp(-gamma||z_i-z_j||^2)
    dist=F.pdist(emb, p=2)
    #put distances into upper triangular matrix
    dist_matrix=torch.zeros(N,N,requires_grad=True)
    dist_matrix[ind[0],ind[1]] = dist
    
    logits=torch.exp(-gamma * dist_matrix**2)
    logits=logits[ind[0],ind[1]]

    labels = adj[ind[0],ind[1]]
    print('labels: ',labels,labels.size())
    print('logits: ',logits,logits.size())
    #compute loss
    loss = F.binary_cross_entropy_with_logits(10e-9+ logits , labels, reduction='mean')


# In general, it's very important to compute all the losses in a numerically stable way
# (e.g. using the log-sum-exp trick) or use existing library functions


def compute_loss_ber_exp1(adj, emb, b=0.1):
    #Init
    N,d=emb.shape
    gamma=0.001
    
    #get indices of upper triangular matrix
    ind=torch.triu_indices(N,N,offset=1)
    
    #compute f(z_i,z_j) = exp(-gamma||z_i-z_j||^2)
    dist=F.pdist(emb, p=2)
    #put distances into upper triangular matrix
    dist_matrix=torch.zeros(N,N,requires_grad=True)
    dist_matrix[ind[0],ind[1]] = dist
    
    logits=torch.exp(-gamma * dist_matrix**2)
    logits=logits[ind[0],ind[1]]

    labels = adj[ind[0],ind[1]]
    print('labels: ',labels,labels.size())
    print('logits: ',logits,logits.size())
    #compute loss
    loss = F.binary_cross_entropy_with_logits(10e-9+ logits , labels, reduction='mean')

