In [11]:
import gust
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from numpy import matrix
import scipy
import scipy.sparse as sp
import torch.distributions as dist
from time import time

from sklearn.model_selection import StratifiedShuffleSplit


from scipy.spatial.distance import squareform

In [13]:
# Load the dataset using `gust` library
# graph.standardize() makes the graph unweighted, undirected and selects
# the largest connected component
# graph.unpack() returns the necessary vectors / matrices

A, X, _, y = gust.load_dataset('cora').standardize().unpack()

sss = StratifiedShuffleSplit(n_splits=5, test_size=0.8, random_state=0)
train_index, test_index = sss.split(self, emb, Y, groups=None)
train_X = A[train_index]
test_X = A[test_index]
train_y = y[train_index]
test_y = y[test_index]

NameError: name 'self' is not defined

In [2]:
torch.set_default_tensor_type('torch.cuda.FloatTensor')

A, X, _, z = gust.load_dataset('cora_ml').standardize().unpack()

sss = StratifiedShuffleSplit(n_splits=5, test_size=0.8, random_state=0)

#adj = torch.FloatTensor(A.toarray()).cuda()
#A = A[0:4,0:4]

In [3]:
def laplacian(A):
    #Transition Matrix P=D-A
    num_nodes = A.shape[0]
    D = np.ravel(A.sum(1))
    L = sp.diags(D) - A
    return L


def sym_normalized_laplacian(A):
    #Symmetric, Normalized Laplacian P=D^(−1/2)AD^(−1/2)
    num_nodes = A.shape[0]
    D = np.ravel(A.sum(1))
    #D[D == 0] = 1  # avoid division by 0 error
    D_sqrt = np.sqrt(D)
    a=np.ones(D_sqrt.shape[0])
    D_sqrt_inv = np.divide(a, D_sqrt, out=np.zeros_like(a), where=D!=0) 
    L = sp.diags(D_sqrt_inv) * A * sp.diags(D_sqrt_inv)
    #L = A / D_sqrt[:, None] / D_sqrt[None, :]
    return L

def Transition(A):
    #Laplacian P=D^−1A
    num_nodes = A.shape[0]
    D = np.ravel(A.sum(1))
    #D[D == 0] = 1  # avoid division by 0 error
    a=np.ones(D.shape[0])
    D_inv = np.divide(a, D, out=np.zeros_like(a), where=D!=0)
    L = sp.diags(D_inv) * A
    return L

def PPR(A):
    #Personalized PageRank Matrix as described in https://openreview.net/pdf?id=H1gL-2A9Ym with the there used hyperparameter alpha=0.1
    #P=alpha(I-(1-alpha)*D^-1/2(A+I)D^-1/2)^-1
    print(A.toarray())
    alpha = 0.1  
    num_nodes = A.shape[0]
    D = np.ravel(A.sum(1))
    #D[D == 0] = 1  # avoid division by 0 error
    D_sqrt = np.sqrt(D)
    a=np.ones(D_sqrt.shape[0])
    D_sqrt_inv = np.divide(a, D_sqrt, out=np.zeros_like(a), where=D!=0)
    A_tilde = sp.diags(D_sqrt_inv) * (A + sp.identity(A.shape[0])) * sp.diags(D_sqrt_inv)
    print('A_tilde: ', A_tilde.toarray())
    L_inv = (sp.identity(A.shape[0]) - (1-alpha) * A_tilde)
    print('L_inv: ', L_inv.toarray())
    L = alpha * np.linalg.pinv(L_inv.toarray())
    print(L)
    return L

def NetMF(A):
    eps=1e-5
    #volume of the graph, usually for weighted graphs, here weight 1
    vol = A.sum()
    
    #b is the number of negative samples, hyperparameter
    b = 3
    
    #T is the window size, as a small window size algorithm is used, set T=10, which showed the best results in the paper
    T=10
    
    #Transition Matrix P=D^-1A
    num_nodes = A.shape[0]
    D = np.ravel(A.sum(1))
    #D[D == 0] = 1  # avoid division by 0 error
    a=np.ones(D.shape[0])
    D_inv = np.divide(a, D, out=np.zeros_like(a), where=D!=0)
    P = np.diag(D_inv) * A.todense()
    
    #Compute M = vol(G)/bT (sum_r=1^T P^r)D^-1
    sum_np=0
    for r in range(1,T+1):
        sum_np+=np.linalg.matrix_power(P,r)
    M = sum_np * np.diag(D_inv) * vol / (b*T)
    M_max = np.maximum(M,np.ones(M.shape[0]))

    #Compute SVD of M
    u, s, vh = np.linalg.svd(np.log(M_max), full_matrices=True)

    #Compute L
    L = u*np.diag(np.sqrt(s+eps))
    print(L.sum(axis=1))
    return L

def simrank_quick(A, C = 0.8, acc = 0.1):
    #https://link.springer.com/chapter/10.1007/978-3-642-14246-8_29
    #Algorithm 2: PAUG-SimRank: Parallel Accelerative SimRank for Undirected Graphs
    #Step 1: Spectral Predecomposition
    A = A.todense()
    print(torch.tensor(A))
    eigvalues, eigvectors = torch.eig(torch.tensor(A), eigenvectors=True)
    eigvalues = eigvalues[:,0]
    
    #Step 2: Iterative Elementwise Matrix Multiplication
    #for i in range(eigvalues.shape[0]):
        
        
    
    return 

def simrank(A, C = 0.8, acc = 1e-10):
    #https://link.springer.com/chapter/10.1007/978-3-642-14246-8_29
    #Algorithm 1: AUG-SimRank: Accelerative SimRank for Undirected Graphs
    A_torch = torch.tensor(A.todense())
    
    #Calculate Transition Probability Q
    Q_torch = A_torch / A_torch.sum(1, keepdims=True)
    Q = np.squeeze(np.asarray((A / np.sum(A,axis = 1))))
    
    
    #Decompose Q
    eigvalues_t, eigvectors_t = torch.eig(Q_torch, eigenvectors=True)
    eigvalues_np, eigvectors_np = np.linalg.eig(Q)
    #for undirected graphs all eigenvalues are real
    eigvectors_np=np.real(eigvectors_np)
    eigvalues_np=np.real(eigvalues_np)
    
    eigvalues_t_real = eigvalues_t[:,0]

    #Initialize
    #S_old = torch.eye(Q.shape[0])
    S_old_np = np.identity(Q.shape[0])
    S_old_t = torch.eye(Q_torch.shape[0])
    M_np = C * np.diag(eigvalues_np) @ np.transpose(np.diag(eigvalues_np))  
    M_t = C * torch.diag(eigvalues_t_real) @ torch.diag(eigvalues_t_real).T
 
    #Converge
    while True:
        S_new_np = np.maximum(np.multiply(M_np, S_old_np), np.identity(M_np.shape[0]))
        
        if (np.absolute(S_new_np-S_old_np)).max()<acc:
            break
        S_old_np = S_new_np
    
    #L = eigvectors @ S_new @ np.linalg.inv(eigvectors)
    print('S_new_np: ', S_new_np)
    L_np = np.dot(eigvectors_np, np.dot(S_new_np, np.linalg.inv(eigvectors_np)))
        
    #Converge
    while True:
        S_new_t = torch.max(M_t*S_old_t,torch.eye(M_t.shape[0]))
        
        if torch.max(torch.abs(S_new_t-S_old_t))<acc:
            break
        S_old_t = S_new_t
    print('S_new_t: ', S_new_t)
    L_t = eigvectors_t @ S_new_t @ torch.inverse(eigvectors_t)
    
        
    
    
    return L_np, L_t


L = laplacian(A)

In [4]:
N = A.shape[0]
D = 32

Z = nn.Parameter(torch.empty(N, D).normal_(std=0.1))
x = nn.Parameter(torch.empty(N, D).normal_(std=0.1))

opt = torch.optim.Adam([Z], lr=1e-2)
e1, e2 = A.nonzero()

In [5]:
def sig(Z, b=0.1, eps=1e-8): 
    dist = torch.matmul(Z,Z.T) +b
    sigdist = 1/(1+torch.exp(dist+eps)+eps)
    logsigdist = torch.log(sigdist+eps)
    pos_term = logsigdist[e1,e2]
    neg_term = torch.log(1-sigdist)
    neg_term[np.diag_indices(N)] = 0.0
    
    return -(pos_term.sum() + neg_term.sum()) / Z.shape[0]**2

In [6]:
def dist(Z, eps=1e-5):
    gamma = 0.1
    dist = ((Z[:, None] - Z[None, :]).pow(2.0).sum(-1) + eps).sqrt()
    neg_term = torch.log(-torch.expm1(-dist)*gamma + eps)
    neg_term[np.diag_indices(N)] = 0.0
    pos_term = -dist[e1, e2]   
    neg_term[e1, e2] = 0.0
    
    return -(pos_term.sum() + neg_term.sum()) / Z.shape[0]**2

In [7]:
def exp(Z, eps=1e-8):
    #e1, e2 = similarity_measure.nonzero()
    emb_abs = torch.FloatTensor.abs(Z)
    dist = -torch.matmul(emb_abs, emb_abs.T)
    neg_term = dist
    neg_term[np.diag_indices(Z.shape[0])] = 0.0
    expdist = torch.exp(dist)
    embedding = 1 - expdist
    logdist = torch.log(embedding + eps)
    pos_term = logdist[e1, e2]
    size=Z.shape[0]
    return -(pos_term.sum() + neg_term.sum()) / Z.shape[0]**2

In [8]:
def kl(L, Z, eps=1e-8):
    #P=softmax(ZZ^T)
    dist=torch.matmul(Z,Z.T)
    sigdist = 1/(1+torch.exp(dist+eps)+eps)
    logsigdist = torch.log(sigdist+eps)
    losses = T*logsigdist
    return losses.sum()

In [9]:
for epoch in range(500):
    opt.zero_grad()
    loss = dist(Z)
    loss.backward()
    opt.step()
    print(loss.item())

2.9060537815093994
2.8549840450286865
2.8081750869750977
2.765533924102783
2.726851224899292
2.6918540000915527
2.6602418422698975
2.631711721420288
2.605971336364746
2.582746744155884
2.561783790588379
2.5428497791290283
2.5257346630096436
2.510248899459839
2.496222734451294
2.483503818511963
2.4719555377960205
2.461456537246704
2.4518988132476807
2.443185567855835
2.4352309703826904
2.4279584884643555
2.421299457550049
2.415193796157837
2.409585952758789
2.404428720474243
2.3996782302856445
2.3952958583831787
2.391246795654297
2.387500047683716
2.3840279579162598
2.380805492401123
2.3778109550476074
2.3750228881835938
2.3724238872528076
2.3699984550476074
2.3677303791046143
2.365607261657715
2.363616704940796
2.3617475032806396
2.3599910736083984
2.358337163925171
2.356778621673584
2.3553075790405273
2.3539175987243652
2.352602243423462
2.351356029510498
2.35017466545105
2.3490521907806396
2.3479855060577393
2.3469698429107666
2.3460028171539307
2.3450798988342285
2.3441991806030273


2.3107728958129883
2.3107664585113525
2.310760259628296
2.3107540607452393
2.3107478618621826
2.310742139816284
2.3107357025146484
2.310729742050171
2.3107235431671143
2.3107175827026367
2.31071138381958
2.3107054233551025
2.310699701309204
2.3106937408447266
2.310688018798828
2.3106820583343506
2.310676336288452
2.3106703758239746
2.3106648921966553
2.310659408569336
2.3106529712677
2.310647487640381
2.3106417655944824
2.310636520385742
2.3106307983398438
2.3106250762939453
2.310619831085205
2.3106138706207275
2.310608148574829
2.310603141784668
2.3105976581573486
2.31059193611145
2.31058669090271
2.3105812072753906
2.3105759620666504
2.31057071685791
2.310565233230591
2.3105599880218506
2.3105545043945312
2.31054949760437
2.310544013977051
2.3105390071868896
2.3105337619781494
2.310528516769409
2.310523271560669
2.310518264770508
2.3105132579803467
2.3105080127716064
2.310502767562866
2.310497999191284
2.310492992401123
2.310487747192383
2.310482978820801
2.3104779720306396
2.3104732

In [10]:
for train_index, test_index in sss.split(X, y):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

NameError: name 'y' is not defined