In [1]:
import pandas as pd
from scipy.sparse import csr_matrix, diags
import numpy as np
from sklearn.metrics.cluster import normalized_mutual_info_score as nmi
import time
import matplotlib.pyplot as plt
from os import listdir

import sys
sys.path += ['Package/']  

from node2vec.model import Node2Vec

from node_embedding import *
from dcsbm import *

import warnings
warnings.filterwarnings("ignore")

directory = 'dataset/'

In [2]:
def RunSimulation():

    datasets = ['amazon', 'dblp', 'livejournal', 'youtube']
    
    tv = []
    tnsv = []
    sv = []
    snsv = []

    for name in datasets:

        EL = pd.read_csv(directory + name + '.csv')[['id1', 'id2']]
        n = len(pd.concat([EL.id1, EL.id2]).unique())

        ℓtrue = pd.read_csv(directory + name + '_label.csv').set_index('node')
        ℓtrue = ℓtrue.loc[np.arange(n)].label.values
        n_clusters = len(np.unique(ℓtrue))

        A = csr_matrix((np.ones(len(EL)), (EL.id1, EL.id2)), shape = (n,n))    
        d = A@np.ones(A.shape[0])

        t0 = time.time()
        dim = 128
        f_func = lambda x:np.sqrt(x)
        Φ = NodeEmbedding(A, dim, f_func = f_func, η = .85, n_epochs = 30, n_prod = 1, 
                          verbose = True, cov_type = 'full')
        t = time.time() - t0
        s = computeScore(Φ, ℓtrue, n_trials = 1, norm_bool = True)

        t0 = time.time()
        X = Node2VecNS(A, dim, verbose = True)
        tns = time.time() - t0
        sns = computeScore(X, ℓtrue, n_trials = 1)
    
        tv.append(t)
        tnsv.append(tns)
        sv.append(s)
        snsv.append(sns)
       
    df = pd.DataFrame(columns = datasets)
    df_time = pd.DataFrame(columns = datasets)
    
    row = dict(zip(datasets, sv))
    df = df.append(row, ignore_index = True)
    row = dict(zip(datasets, tv))
    df_time = df_time.append(row, ignore_index = True)
    
    row = dict(zip(datasets, snsv))
    df = df.append(row, ignore_index = True)
    row = dict(zip(datasets, tnsv))
    df_time = df_time.append(row, ignore_index = True)
    
   # save the result
    try:
        nn = (np.max([int(x.split('_')[1]) for x in listdir('saved_files/real_graphs/perf/')]))
        df.to_csv('saved_files/real_graphs/perf/v_' + str(nn+1) + '_.csv', index = False)
        df_time.to_csv('saved_files/real_graphs/time/v_' + str(nn+1) + '_.csv', index = False)

    except:
        df.to_csv('saved_files/real_graphs/perf/v_' + str(1) + '_.csv', index = False)
        df_time.to_csv('saved_files/real_graphs/time/v_' + str(1) + '_.csv', index = False)
        
    return

In [3]:
n_sim = 5

for i in range(n_sim):
    RunSimulation()

Running the optimization for k = 1
Preprocessing progress: 0%Learning embeddings using the Skip-Gram model...
Preprocessing progress: 94.45%cessing progress: 81.57%81.57%78.35%
Preprocessing transition probabilities: done!
Walking progress: 96.6%%5.13%%ress: 00%%0%0%0%0%
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing progress: 000%%0%Learning embeddings using the Skip-Gram model...
Preprocessing progress: 95.16%cessing progress: 94.66%94.6694.66%%ress: 89.7189.71%89.71%Preprocessing progress: 
Preprocessing transition probabilities: done!
Walking progress: 99.12%6.17%%ress: 000%%%00
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1



Preprocessing progress: 4.186%%essing progress: 0%00% the Skip-Gram model...
Preprocessing progress: 98.6%%97.67%g progress: 95.3495.34%%ng progress: 
Preprocessing transition probabilities: done!
Walking progress: 99.22%g progress: 86.8286.82%%progress: 0%0%
Simulating random walks: done!
Learning embeddings: done!




Running the optimization for k = 1
Preprocessing progress: 0%Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.77%79.46%g progress: 27.85%27.85%
Preprocessing transition probabilities: done!
Walking progress: 96.54%g progress: 0%0%0%g progress: Walking progress: 0%0%0%0%
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Learning embeddings using the Skip-Gram model...
Preprocessing progress: 94.45%85.86%g progress: 39.71%39.71%
Preprocessing transition probabilities: done!
Walking progress: 96.6%%
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing proLearning embeddings using the Skip-Gram model...
Preprocessing progress: 95.32%4.5%7%g progress: 93.0193.0193.01%%%ress: 91.86%91.8691.86%%
Preprocessing transition probabilities: done!
Walking progress: 99.12%9.56%%ress: 21.4821.48%%rogress: 0%%00%%0%0%0%: Walking progress: 
Simulating random walks: done!
Learning e



Preprocessing progress: 00%%Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.75%cessing progress: 98.29%98.29%96.43%
Preprocessing transition probabilities: done!
Walking progress: 99.22%g progress: 69.7669.76%%
Simulating random walks: done!
Learning embeddings: done!




Running the optimization for k = 1
Preprocessing progress: 0000%%%%Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.77%96.17%g progress: 90.23%90.23%
Preprocessing transition probabilities: done!
Walking progress: 96.54%king progress: 0%0%0%
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing progress: 95.52%cessing progress: 89.0889.08%%
Preprocessing transition probabilities: done!
Walking progress: 96.6%%king progress: 0%0%Walking progress: 0%0%0%0%
Simulating random walks: done!
Learning embeddings using the Skip-Gram model...
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing progress: 0000%%%0%%Learning embeddings using the Skip-Gram model...
Preprocessing progress: 95.32%95.32%91.36%ress: 91.19%91.19%85.41%ress: 85.41%85.41%
Preprocessing transition probabilities: done!
Walking progress: 99.12%1.39%%ress: Walking progress: Walking progress: 
Simulating random walks: done!
L



Preprocessing progress: 0%0Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.91%3.64%%Preprocessing progress: 89.92%
Preprocessing transition probabilities: done!
Walking progress: 99.22%g progress: 34.11%34.11%
Simulating random walks: done!
Learning embeddings: done!




Running the optimization for k = 1
Preprocessing progress: Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.4%98.4%%%g progress: 97.66%97.66%
Preprocessing transition probabilities: done!
Walking progress: 96.54%king progress: %0%g progress: Walking progress: Walking progress: 000%%00%
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing progress: 32.2%ocessing progress: 31.1331.13%%ip-Gram model...
Preprocessing progress: Preprocessing progress: 94.45%94.45%
Preprocessing transition probabilities: done!
Walking progress: 96.6%%ng progress: %00%0%ng progress: 
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing progress: Preprocessing progress: 00%%Learning embeddings using the Skip-Gram model...
Preprocessing progress: Preprocessing progress: 95.3295.32%%g progress: 94.17%94.1794.17%%
Preprocessing transition probabilities: done!
Walking progress: 97.



Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.75%cessing progress: 96.89%96.89%ess: 78.91%
Preprocessing transition probabilities: done!
Walking progress: 99.22%3.95%%
Simulating random walks: done!
Learning embeddings: done!




Running the optimization for k = 1
Preprocessing progress: 10.03%%0%0%%g progress: Preprocessing progress: 
Preprocessing progress: Preprocessing progress: 98.7798.77%%
Preprocessing transition probabilities: done!
Walking progress: 96.54%g progress: 37.1337.13%%ress: Walking progress: 0%000%%%
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing progress: Preprocessing progress: 00%0%%Learning embeddings using the Skip-Gram model...
Preprocessing progress: Preprocessing progress: 94.45%94.45%
Preprocessing transition probabilities: done!
Walking progress: 96.6%%0% progress: 0%0%
Simulating random walks: done!
Learning embeddings: done!
Running the optimization for k = 1
Preprocessing progress: 00%%Learning embeddings using the Skip-Gram model...
95.32Preproce progress: 95.16%95.16%g progress: 94.17%94.17%Preprocessing progress: 85.74%ssing progress: %95.32%95.32%
Preprocessing transition probabilities: done!
Walking progress: 99.12



Preprocessing progress: 00%Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.75%8.13%%g progress: 93.64%93.64%93.17%sing progress: 34.57%
Preprocessing transition probabilities: done!
Walking progress: 97.67%g progress: 37.2137.21%%
Simulating random walks: done!
Learning embeddings: done!




Running the optimization for k = 1
Preprocessing progress: 1Learning embeddings using the Skip-Gram model...
Preprocessing progress: 98.4%%cessing progress: 97.29%97.29%
Preprocessing transition probabilities: done!
Walking progress: 96.54%18.57%ress: 00%00%
Simulating random walks: done!
Learning embeddings: done!


| Dataset     | NMI f | NMI NS | t us | t NS  |
| -------     | ----- | ------ | ---- | ----- |
| amazon      | 0.93  | 0.93   | 1    | 16    |
| dblp        | 0.52  | 0.51   | 9    | 116   |
| livejournal | 0.92  | 0.91   | 13   | 137   |
| youtube     | 0.60  | 0.63   | 4    | 54    |


In [30]:
files = listdir('saved_files/real_graphs/perf/')

df_list_perf = []
df_list_time = []

for f in files:
    df_list_perf.append(pd.read_csv('saved_files/real_graphs/perf/' + f))
    df_list_time.append(pd.read_csv('saved_files/real_graphs/time/' + f))
    
df_list_perf = pd.concat(df_list_perf).reset_index()
df_list_time = pd.concat(df_list_time).reset_index()

print(df_list_perf.groupby('index').mean())
print(df_list_time.groupby('index').mean())

         amazon      dblp  livejournal   youtube
index                                           
0      0.924100  0.517563     0.915837  0.602270
1      0.934434  0.509894     0.912073  0.652542
          amazon        dblp  livejournal    youtube
index                                               
0       3.688894   25.905197    33.536647  11.452375
1      16.448200  179.075804   195.456344  71.530425
