## Benchmarks
This demo shows the performance of other SOTA graph embeddings methods and their limitations:
they do not take attributes into account (only can handle discreet attributes)
they are very dependable onto the reinitialization of the random walks and minor graph changes

In [5]:
import os,sys
import random
import numpy as np
import networkx as nx

sys.path.append(os.path.realpath('lib'))
from lib.data_loader import load_local_data
from benchmarks.sub2vec import Sub2vec

### sub2vec demo

In [6]:
dataset_n='aids'
path='data/'
X,y=load_local_data(path,dataset_n, attributes=False, use_node_deg=False)
sub2vec = Sub2vec(property='s', walkLength=100, output='aids_walk', d=128, iter=100, windowSize=2, p=0.5, model='dm')
sub2vec.obtainRandomWalks(X)
embeddings = sub2vec.calculateEmbeddings()
print(embeddings.shape)

Total vects  2000
(2000, 128)


In [None]:
# re-calculate the embeddings and demonstrate that the cosine similarity doesn't work within rounds 

In [8]:
sub2vec.obtainRandomWalks(X)
embeddings2 = sub2vec.calculateEmbeddings()


Total vects  2000


In [10]:
# display cosine similarity for first 10 embeddings
from sklearn.metrics.pairwise import cosine_similarity
num_graphs, d = embeddings.shape
random_graphs =  np.random.randint(0, num_graphs, size=(10))
for i in random_graphs:
    print(f"Similarity of two graphs within rounds is {cosine_similarity(embeddings[i,:].reshape(1, -1), embeddings2[i,:].reshape(1, -1))}")


Similarity of two graphs within rounds is [[-0.02972129]]
Similarity of two graphs within rounds is [[-0.00695749]]
Similarity of two graphs within rounds is [[0.18097776]]
Similarity of two graphs within rounds is [[-0.0468096]]
Similarity of two graphs within rounds is [[-0.0196318]]
Similarity of two graphs within rounds is [[0.0975268]]
Similarity of two graphs within rounds is [[-0.0731004]]
Similarity of two graphs within rounds is [[0.06960186]]
Similarity of two graphs within rounds is [[0.00319865]]
Similarity of two graphs within rounds is [[-0.16390699]]


In [None]:
# and now calculate the embeddings in one go and check whether they are similar or not this way

In [None]:
X_double = np.hstack((X,X))
sub2vec.obtainRandomWalks(X_double)
embeddings1_2 = sub2vec.calculateEmbeddings()
