In [1]:
%load_ext autoreload
%autoreload 2

import sys
import time
sys.path.insert(0, '../')

# import embedding methods
from embedding.bernoulli import Bernoulli
from embedding.kl import KL
from embedding.matrix_factorization import MatrixFactorization

# import evaluation methods
from evaluation import evaluate_link_prediction
from evaluation import evaluate_node_classification
from evaluation import evaluate_node_clustering
# 
# import utils
from utils import graph_util
from utils import plot_util
from utils import model_util

# visualization
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')

import warnings
warnings.filterwarnings('ignore')

## Experiment 1

In [5]:
# Experiment 1

exp = {
    "max_epochs": 100,
    "learning_rate": 1e-2, #Adam
    "weight_decay": 1e-7,
    
    "link_prediction": True,
    "link_pred_num_rounds": 1,
    "link_pred_train_ratio": 0.8,
    "link_pred_eval_every_n_steps": 5,
    "link_pred_edge_emb_method": "average",
    
    "node_classification": False,
    "node_class_num_rounds": 2,
    "node_class_train_ratio": 0.8,
    "node_class_eval_every_n_steps": 5,
    
    "node_clustering": False,
    "node_clustering_num_rounds": 5,
    "node_clustering_eval_epochs": 1,
}

# pick datasets
#datasets = ["cora", "citeseer", "polblogs"] # parliament, hvr, purdue_facebook
#datasets = ["cora", "hvr", "polblogs"] 
datasets = ["cora", "hvr"] 

#TO DO:
#Laplacian (nobrainer) : Till done
#NetMF : Till
#PPR : Till done(?)
#Sum_Power_Tran : Jan
#Sim_Rank : Jan
#Big Task: what to do with W / U Epsilon : Jan
#possible: Forest Fire / Stochastic Optimization


#Bernoulli
model_01 = Bernoulli(embedding_dimension=64, decoder='sigmoid') #works!
#model_02 = Bernoulli, sigmoid with W 
model_03 = Bernoulli(embedding_dimension=64, decoder='gaussian') #works!
model_04 = Bernoulli(embedding_dimension=64, decoder='exponential') #works!
#model_05 = Bernoulli, exponential with W


#KL
#KL(similarity_measure{needs to be row stochastic}|softmax(ZZ^T))
model_06 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='PPR')
model_07 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='Sum_Power_Tran')
model_08 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='Sim_Rank')
model_09 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='Transition')
#model_10 = KL, PPR, with W
#model_11 = KL, Sum_Power_Tran, with W
#model_12 = KL, Sim_Rank, with W
#model_13 = KL, Transition, with W


#Matrix Factorization
model_14 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency') #works!
model_15 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian') 
model_16 = MatrixFactorization(embedding_dimension=64, similarity_measure='transition')
model_17 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian') #works!
model_18 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF')
model_19 = MatrixFactorization(embedding_dimension=64, similarity_measure='ppr')
model_20 = MatrixFactorization(embedding_dimension=64, similarity_measure='sum_power_tran')
model_21 = MatrixFactorization(embedding_dimension=64, similarity_measure='sim_rank')
#model_22 = MF, adjacency, with W
#model_23 = MF, laplacian, with W
#model_24 = MF, Transition, with W
#model_25 = MF, sym_normalized_laplacian, with W
#model_26 = MF, NetMF, with W
#model_27 = MF, PPR, with W
#model_28 = MF, Sum_Power_Tran, with W
#model_29 = MF, Sim_Rank, with W

# model_14, model_15, model_16, 
embedding_methods = [model_20]
#embedding_methods = [model_15]

# Normalized mutual information
total_NMI=[]

# setup folders to store experiment setup summary and results
result_folder = plot_util.setup_folders_and_summary_files(exp, datasets, embedding_methods)
print(f'The results of the current experiment are stored at experiments/{result_folder}')

for dataset in datasets:
    
    # load dataset
    A, y = graph_util.load_dataset(dataset)
    
    
    
    for model in embedding_methods:
        print(model._similarity_measure)
        start = time.time()
        # do link prediction
        if(exp["link_prediction"]):
            link_prediction_folder = result_folder + "/link_prediction"
            evaluate_link_prediction.expLP(A,dataset,model,exp["link_pred_num_rounds"],
                                           link_prediction_folder, train_ratio=exp["link_pred_train_ratio"], 
                                           edge_emb_method=exp["link_pred_edge_emb_method"],train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["link_pred_eval_every_n_steps"], undirected=True)
            
        # do node classification
        if(exp["node_classification"]):
            node_classification_folder = result_folder + "/node_classification"
            evaluate_node_classification.expNC(A,y,dataset,model,exp["node_class_num_rounds"],
                                               node_classification_folder, train_ratio=exp["node_class_train_ratio"],
                
                                               train_epochs=exp["max_epochs"],eval_epochs=exp["node_class_eval_every_n_steps"],undirected=True)
        if(exp["node_clustering"]):
            node_clustering_folder = result_folder + "/node_clustering"
            NMI = evaluate_node_clustering.exp_Node_Clustering(A,y,dataset,model,exp["node_clustering_num_rounds"],
                                           node_clustering_folder ,eval_epochs=exp["node_clustering_eval_epochs"],undirected=True)
            total_NMI.append(NMI)
            
        end = time.time()
        print(f'Model evaluation took: {end-start} seconds')

The results of the current experiment are stored at experiments/results/2020_01_13_14_32
sum_power_tran
	Link prediction evaluation has started...
Model evaluation took: 112.22607588768005 seconds
sum_power_tran
	Link prediction evaluation has started...
Model evaluation took: 8.984415769577026 seconds


In [None]:
sns.boxplot(data= total_NMI);