In [25]:
%load_ext autoreload
%autoreload 2

import sys
import time
import os
sys.path.insert(0, '../')

# import embedding methods
from embedding.bernoulli import Bernoulli
from embedding.kl import KL
from embedding.matrix_factorization import MatrixFactorization

# import evaluation methods
from evaluation import evaluate_link_prediction
from evaluation import evaluate_node_classification
from evaluation import evaluate_node_clustering
# 
# import utils
from utils import graph_util
from utils import plot_util
from utils import model_util

# visualization
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Experiment 1

In [26]:
# Experiment 1

exp = {
    "max_epochs": 1000,
    "learning_rate": 1e-2, #Adam
    "weight_decay": 1e-7,
    
    "link_prediction":True,
    "link_pred_num_rounds": 10,
    "link_pred_train_ratio": 0.8,
    "link_pred_eval_every_n_steps": 20,
    "link_pred_edge_emb_method": "average",
    
    "node_classification": True,
    "node_class_num_rounds": 10,
    "node_class_train_ratio": 0.8,
    "node_class_eval_every_n_steps": 50,
    
    "node_clustering": True,
    "node_clustering_num_rounds": 10,
    "node_clustering_eval_epochs": 2,
}


datasets = ["cora"]#, "citeseer", "polblogs","hvr"] 

#TO DO:
#Laplacian (nobrainer) : Till done
#NetMF : Tillcd
#PPR : Till done(?)
#Sum_Power_Tran : Jan
#Sim_Rank : Jan
#Big Task: what to do with W / U Epsilon : Jan
#possible: Forest Fire / Stochastic Optimization


#Bernoulli
model_01 = Bernoulli(embedding_dimension=64, decoder='sigmoid') #works!
model_02 = Bernoulli(embedding_dimension=64, decoder='sigmoid', W_enabled=True)
model_03 = Bernoulli(embedding_dimension=64, decoder='gaussian') #works!
model_04 = Bernoulli(embedding_dimension=64, decoder='exponential') #works!
#model_05 = Bernoulli, exponential with W


#KL
#KL(similarity_measure{needs to be row stochastic}|softmax(ZZ^T))
model_06 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='ppr')
model_07 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='sum_power_tran')
model_08 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='sim_rank')
model_09 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='transition')
model_10 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='ppr', W_enabled=True)
model_11 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='sum_power_tran', W_enabled=True)
model_12 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='sim_rank', W_enabled=True)
model_13 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='transition', W_enabled=True)


#Matrix Factorization
model_14 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency', embedding_option=1) #works!
model_15 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian', embedding_option=1) 
model_16 = MatrixFactorization(embedding_dimension=64, similarity_measure='transition', embedding_option=1)
model_17 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian', embedding_option=1) #works!
model_18 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF', embedding_option=1)
model_19 = MatrixFactorization(embedding_dimension=64, similarity_measure='ppr', embedding_option=1)
model_20 = MatrixFactorization(embedding_dimension=64, similarity_measure='sum_power_tran', embedding_option=1)
model_21 = MatrixFactorization(embedding_dimension=64, similarity_measure='sim_rank', embedding_option=1)

model_22 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency', embedding_option=2) #works!
model_23 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian', embedding_option=2) 
model_24 = MatrixFactorization(embedding_dimension=64, similarity_measure='transition', embedding_option=2)
model_25 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian', embedding_option=2) #works!
model_26 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF', embedding_option=2)
model_27 = MatrixFactorization(embedding_dimension=64, similarity_measure='ppr', embedding_option=2)
model_28 = MatrixFactorization(embedding_dimension=64, similarity_measure='sum_power_tran', embedding_option=2)
model_29 = MatrixFactorization(embedding_dimension=64, similarity_measure='sim_rank', embedding_option=2)

# model_14, model_15, model_16, 
embedding_methods = [
            model_01, model_03,
            model_06,model_07, model_09,
            model_14,model_15,model_16,model_17,
            model_18,model_19,model_20 ]


dset_NMI=[]
dset_AUC=[]
dset_ACC=[]
# setup folders to store experiment setup summary and results
result_folder = plot_util.setup_folders_and_summary_files(exp, datasets, embedding_methods)
print(f'The results of the current experiment are stored at experiments/{result_folder}')

for dataset in datasets:
    
    # load dataset
    total_NMI=[]
    total_AUC=[]
    total_ACC=[]
    A, y = graph_util.load_dataset(dataset)
    
    for model in embedding_methods:
        
        print(model.get_method_summary())
        start = time.time()
        
        directory= 'results_plot/'+dataset+'/'+str(model._method_name)
        if(not os.path.isdir(directory)):
            os.mkdir(directory)
        
        # do link prediction
        if(exp["link_prediction"]):
            link_prediction_folder = result_folder + "/link_prediction"
            AUC= evaluate_link_prediction.expLP(A,dataset,model,exp["link_pred_num_rounds"],
                                           link_prediction_folder, train_ratio=exp["link_pred_train_ratio"], 
                                           edge_emb_method=exp["link_pred_edge_emb_method"],train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["link_pred_eval_every_n_steps"], undirected=True)
            total_AUC.append(AUC)
            
            if(not os.path.isdir(directory+'/link_prediction')):
                os.mkdir(directory+'/link_prediction')
                
            with open(directory+'/link_prediction/'+str(model._similarity_measure)+'_AUC_results.txt','w') as f:
                for listitem in AUC:
                    f.write('%s\n' % listitem) 
                    
        # do node classification
        if(exp["node_classification"]):
            node_classification_folder = result_folder + "/node_classification"
            ACC = evaluate_node_classification.expNC(A,y,dataset,model,exp["node_class_num_rounds"],
                                               node_classification_folder, train_ratio=exp["node_class_train_ratio"],
                                               train_epochs=exp["max_epochs"],eval_epochs=exp["node_class_eval_every_n_steps"],undirected=True)
            total_ACC.append(ACC)
            
            if(not os.path.isdir(directory+'/node_classification')):
                os.mkdir(directory+'/node_classification')
                
            with open(directory+'/node_classification/'+str(model._similarity_measure)+'_ACC_results.txt','w') as f:
                for listitem in ACC:
                    f.write('%s\n' % listitem) 
         
        # do node clustering
        if(exp["node_clustering"]):
            node_clustering_folder = result_folder + "/node_clustering"
            NMI = evaluate_node_clustering.exp_Node_Clustering(A,y,dataset,model,exp["node_clustering_num_rounds"],
                                           node_clustering_folder , train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["node_clustering_eval_epochs"],undirected=True)
            total_NMI.append(NMI)
            
            if(not os.path.isdir(directory+'/node_clustering')):
                os.mkdir(directory+'/node_clustering')
                
            with open(directory+'/node_clustering/'+str(model._similarity_measure)+'_NMI_results.txt','w') as f:
                for listitem in NMI:
                    f.write('%s\n' % listitem)    
                    
        end = time.time()
        print(f'Model evaluation took: {end-start} seconds')
        
    dset_NMI.append(total_NMI)
    dset_AUC.append(total_AUC)
    dset_ACC.append(total_ACC)
    
    
headers= [  'sig','gau',
            'ppr','spt','sr',
            'adj','lap','ftr','nl','nf', 'fppr', 'fspt'
         ]


import pickle

with open('results_plot/cora/cora_dset_NMI.txt', 'wb') as f:
    pickle.dump(dset_NMI, f)
    
with open('results_plot/cora/cora_dset_AUC.txt', 'wb') as f:
    pickle.dump(dset_AUC, f)

with open('results_plot/cora/cora_dset_ACC.txt', 'wb') as f:
    pickle.dump(dset_ACC, f)

for ds,NMI in zip(datasets,dset_NMI):
    plt.figure()
    sns_plot = sns.boxplot(x=headers, y=NMI);
    plt.title("Comparison for node clustering on "+ds)
    plt.xlabel("model")
    plt.ylabel("NMI")
    plt.ylim([0.,1.])
    sns_plot.figure.savefig('plots/'+ds+'/NMI_'+ds+'.png')
    
for ds,AUC in zip(datasets,dset_AUC):
    plt.figure()
    sns_plot = sns.boxplot(x=headers, y=AUC);
    plt.title("Comparison for link prediction on "+ds)
    plt.xlabel("model")
    plt.ylabel("AUC")
    plt.ylim([0.,1.])
    sns_plot.figure.savefig('plots/'+ds+'/AUC_'+ds+'.png')
    
for ds,ACC in zip(datasets,dset_ACC):
    plt.figure()
    sns_plot = sns.boxplot(x=headers, y=ACC);
    plt.title("Comparison for link prediction on "+ds)
    plt.xlabel("model")
    plt.ylabel("accuracy")
    plt.ylim([0.,1.])
    sns_plot.figure.savefig('plots/'+ds+'/ACC_'+ds+'.png')

The results of the current experiment are stored at experiments/results/2020_01_18_21_38
Bernoulli_sigmoid_sigmoid_64_False

Link prediction evaluation has started...


Round: 1

Epoch    0, loss = 0.98493

Round: 2

Epoch    0, loss = 0.98581

Round: 3

Epoch    0, loss = 0.97058

Round: 4

Epoch    0, loss = 0.97763

Round: 5

Epoch    0, loss = 0.99128

Round: 6

Epoch    0, loss = 0.97413

Round: 7

Epoch    0, loss = 0.98027

Round: 8

Epoch    0, loss = 0.97227

Round: 9

Epoch    0, loss = 0.97990

Round: 10

Epoch    0, loss = 0.96352

=> mean auc score: 0.5720428789841625

Node classification evaluation has started...

Epoch    0, loss = 1.02647
Epoch   25, loss = 0.34604
Epoch   50, loss = 0.11754
Epoch   75, loss = 0.05486
Epoch  100, loss = 0.03418
Epoch  125, loss = 0.02509
Epoch  150, loss = 0.02010
Epoch  175, loss = 0.01689
Epoch  200, loss = 0.01459
Epoch  225, loss = 0.01280
Epoch  250, loss = 0.01135
Epoch  275, loss = 0.01013
Epoch  300, loss = 0.00910
Epoch  325, l

Epoch 1925, loss = 0.00120
Epoch 1950, loss = 0.00118
Epoch 1975, loss = 0.00117
Epoch 2000, loss = 0.00116
Epoch 2025, loss = 0.00115
Epoch 2050, loss = 0.00114
Epoch 2075, loss = 0.00113
Epoch 2100, loss = 0.00112
Epoch 2125, loss = 0.00111
Epoch 2150, loss = 0.00110
Epoch 2175, loss = 0.00109
Epoch 2200, loss = 0.00108
Epoch 2225, loss = 0.00107
Epoch 2250, loss = 0.00106
Epoch 2275, loss = 0.00106
Epoch 2300, loss = 0.00105
Epoch 2325, loss = 0.00104
Epoch 2350, loss = 0.00103
Epoch 2375, loss = 0.00102
Epoch 2400, loss = 0.00102
Epoch 2425, loss = 0.00101
Epoch 2450, loss = 0.00100
Epoch 2475, loss = 0.00099
Epoch 2500, loss = 0.00099
Epoch 2525, loss = 0.00098
Epoch 2550, loss = 0.00097
Epoch 2575, loss = 0.00097
Epoch 2600, loss = 0.00096
Epoch 2625, loss = 0.00095
Epoch 2650, loss = 0.00095
Epoch 2675, loss = 0.00094
Epoch 2700, loss = 0.00093
Epoch 2725, loss = 0.00093
Epoch 2750, loss = 0.00092
Epoch 2775, loss = 0.00092
Epoch 2800, loss = 0.00091
Epoch 2825, loss = 0.00090
E

Epoch 3800, loss = 0.00455
Epoch 3825, loss = 0.00455
Epoch 3850, loss = 0.00455
Epoch 3875, loss = 0.00455
Epoch 3900, loss = 0.00455
Epoch 3925, loss = 0.00455
Epoch 3950, loss = 0.00455
Epoch 3975, loss = 0.00455
Epoch 4000, loss = 0.00455
Epoch 4025, loss = 0.00455
Epoch 4050, loss = 0.00455
Epoch 4075, loss = 0.00455
Epoch 4100, loss = 0.00455
Epoch 4125, loss = 0.00455
Epoch 4150, loss = 0.00455
Epoch 4175, loss = 0.00455
Epoch 4200, loss = 0.00455
Epoch 4225, loss = 0.00455
Epoch 4250, loss = 0.00455
Epoch 4275, loss = 0.00455
Epoch 4300, loss = 0.00455
Epoch 4325, loss = 0.00455
Epoch 4350, loss = 0.00455
Epoch 4375, loss = 0.00455
Epoch 4400, loss = 0.00455
Epoch 4425, loss = 0.00455
Epoch 4450, loss = 0.00455
Epoch 4475, loss = 0.00455
Epoch 4500, loss = 0.00455
Epoch 4525, loss = 0.00455
Epoch 4550, loss = 0.00455
Epoch 4575, loss = 0.00455
Epoch 4600, loss = 0.00455
Epoch 4625, loss = 0.00455
Epoch 4650, loss = 0.00454
Epoch 4675, loss = 0.00454
Epoch 4700, loss = 0.00454
E

KeyboardInterrupt: 

In [16]:

import pickle

with open('results_plot/cora/cora_dset_NMI.txt', 'wb') as f:
    pickle.dump(dset_NMI, f)
    
with open('results_plot/cora/cora_dset_AUC.txt', 'wb') as f:
    pickle.dump(dset_AUC, f)

with open('results_plot/cora/cora_dset_ACC.txt', 'wb') as f:
    pickle.dump(dset_ACC, f)
    
    
    
    
    
#with open('a.txt', 'rb') as f:
#   my_list = pickle.load(f)

In [20]:

    
headers= [  'sig','gau',
            'ppr','spt','sr',
            'adj','lap','ftr','nl','nf', 'fppr', 'fspt'
         ]




In [None]:

for ds,NMI in zip(datasets,dset_NMI):
    plt.figure()
    sns_plot = sns.boxplot(x=headers, y=NMI);
    plt.title("Comparison for node clustering on "+ds)
    plt.xlabel("model")
    plt.ylabel("NMI")
    plt.ylim([0.,1.])
    sns_plot.figure.savefig('plots/'+ds+'/NMI_'+ds+'.png')

In [None]:
for ds,AUC in zip(datasets,dset_AUC):
    plt.figure()
    sns_plot = sns.boxplot(x=headers, y=AUC);
    plt.title("Comparison for link prediction on "+ds)
    plt.xlabel("model")
    plt.ylabel("AUC")
    plt.ylim([0.,1.])
    sns_plot.figure.savefig('plots/'+ds+'/AUC_'+ds+'.png')

In [None]:
for ds,ACC in zip(datasets,dset_ACC):
    plt.figure()
    sns_plot = sns.boxplot(x=headers, y=ACC);
    plt.title("Comparison for link prediction on "+ds)
    plt.xlabel("model")
    plt.ylabel("accuracy")
    plt.ylim([0.,1.])
    sns_plot.figure.savefig('plots/'+ds+'/ACC_'+ds+'.png')