In [3]:
%load_ext autoreload
%autoreload 2

import sys
import time
import os
sys.path.insert(0, '../')

# import embedding methods
from embedding.bernoulli import Bernoulli
from embedding.kl import KL
from embedding.matrix_factorization import MatrixFactorization

# import evaluation methods
from evaluation import evaluate_link_prediction
from evaluation import evaluate_node_classification
from evaluation import evaluate_node_clustering
# 
# import utils
from utils import graph_util
from utils import plot_util

# visualization
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Experiment 1

In [None]:
# Experiment 1

exp = {
    "max_epochs": 10000,
    "learning_rate": 1e-2, #Adam
    "weight_decay": 1e-7,
    
    "link_prediction":True,
    "link_pred_num_rounds": 10,
    "link_pred_train_ratio": 0.8,
    "link_pred_eval_every_n_steps": 50,
    "link_pred_edge_emb_method": "average",
    
    "node_classification": True,
    "node_class_num_rounds": 10,
    "node_class_train_ratio": 0.2,
    "node_class_eval_every_n_steps": 50,
    
    "node_clustering": True,
    "node_clustering_num_rounds": 10,
    "node_clustering_eval_epochs": 50,
}


datasets = ["cora","hvr","citeseer", "polblogs"]

#Bernoulli
model_01 = Bernoulli(embedding_dimension=64, decoder='sigmoid')
model_02 = Bernoulli(embedding_dimension=64, decoder='gaussian')
model_03 = Bernoulli(embedding_dimension=64, decoder='exponential')
model_04 = Bernoulli(embedding_dimension=64, decoder='dist2')


#KL
model_05 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='ppr')
model_06 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='sum_power_tran')
model_07 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='transition')


#Matrix Factorization
model_08 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency', embedding_option=1)
model_09 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian', embedding_option=1) 
model_10 = MatrixFactorization(embedding_dimension=64, similarity_measure='transition', embedding_option=1)
model_11 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian', embedding_option=1)
model_12 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF', embedding_option=1)
model_13 = MatrixFactorization(embedding_dimension=64, similarity_measure='ppr', embedding_option=1)
model_14 = MatrixFactorization(embedding_dimension=64, similarity_measure='sum_power_tran', embedding_option=1)

model_15 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency', embedding_option=2)
model_16 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian', embedding_option=2) 
model_17 = MatrixFactorization(embedding_dimension=64, similarity_measure='transition', embedding_option=2)
model_18 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian', embedding_option=2)
model_19 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF', embedding_option=2)
model_20 = MatrixFactorization(embedding_dimension=64, similarity_measure='ppr', embedding_option=2)
model_21 = MatrixFactorization(embedding_dimension=64, similarity_measure='sum_power_tran', embedding_option=2)

embedding_methods = [
#             model_01, model_02, model_03, model_04, # Bernoulli
#             model_05,model_06, model_07, # KL
            model_08, model_09,model_10,model_11,model_12, model_13, model_14, # Matrix Factorization 1
            model_15, model_16,model_17,model_18,model_19,model_20, model_21]

# setup folders to store experiment setup summary and results
result_folder = plot_util.setup_folders_and_summary_files(exp, datasets, embedding_methods)
print(f'The results of the current experiment are stored at experiments/{result_folder}')

for dataset in datasets:
    
    # load dataset
    A, y = graph_util.load_dataset(dataset)
    
    for model in embedding_methods:
        
        print(model.get_method_summary())
        start = time.time()
        
        # link prediction
        if(exp["link_prediction"]):
            link_prediction_folder = result_folder + "/link_prediction"
            evaluate_link_prediction.expLP(A,dataset,model,exp["link_pred_num_rounds"],
                                           link_prediction_folder, train_ratio=exp["link_pred_train_ratio"], 
                                           edge_emb_method=exp["link_pred_edge_emb_method"],train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["link_pred_eval_every_n_steps"], undirected=True)
                    
        # node classification
        if(exp["node_classification"]):
            node_classification_folder = result_folder + "/node_classification"
            evaluate_node_classification.expNC(A,y,dataset,model,exp["node_class_num_rounds"],
                                               node_classification_folder, train_ratio=exp["node_class_train_ratio"],
                                               train_epochs=exp["max_epochs"],eval_epochs=exp["node_class_eval_every_n_steps"],undirected=True)
        # node clustering
        if(exp["node_clustering"]):
            node_clustering_folder = result_folder + "/node_clustering"
            evaluate_node_clustering.exp_Node_Clustering(A,y,dataset,model,exp["node_clustering_num_rounds"],
                                           node_clustering_folder, train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["node_clustering_eval_epochs"],undirected=True)
        end = time.time()
        print(f'Model evaluation took: {end-start} seconds')

The results of the current experiment are stored at experiments/results/2020_01_27_15_43
Matrix_Fatorization_adjacency_64_1

Link prediction evaluation has started...


Node classification evaluation has started...


Node clustering evaluation has started...

Model evaluation took: 62.72730350494385 seconds
Matrix_Fatorization_laplacian_64_1

Link prediction evaluation has started...


Node classification evaluation has started...


Node clustering evaluation has started...

Model evaluation took: 58.14901328086853 seconds
Matrix_Fatorization_transition_64_1

Link prediction evaluation has started...


Node classification evaluation has started...


Node clustering evaluation has started...

Model evaluation took: 54.35699820518494 seconds
Matrix_Fatorization_sym_normalized_laplacian_64_1

Link prediction evaluation has started...


Node classification evaluation has started...


Node clustering evaluation has started...

Model evaluation took: 83.72420239448547 seconds
Matrix_Fatoriza