In [3]:
%load_ext autoreload
%autoreload 2

import sys
import time
import os
sys.path.insert(0, '../')

# import embedding methods
from embedding.bernoulli import Bernoulli
from embedding.kl import KL
from embedding.matrix_factorization import MatrixFactorization

# import evaluation methods
from evaluation import evaluate_link_prediction
from evaluation import evaluate_node_classification
from evaluation import evaluate_node_clustering
# 
# import utils
from utils import graph_util
from utils import plot_util

# visualization
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Experiment 1

In [None]:
# Experiment 1

exp = {
    "max_epochs": 10000,
    "learning_rate": 1e-2, #Adam
    "weight_decay": 1e-7,
    
    "link_prediction":True,
    "link_pred_num_rounds": 10,
    "link_pred_train_ratio": 0.8,
    "link_pred_eval_every_n_steps": 50,
    "link_pred_edge_emb_method": "average",
    
    "node_classification": True,
    "node_class_num_rounds": 10,
    "node_class_train_ratio": 0.2,
    "node_class_eval_every_n_steps": 50,
    
    "node_clustering": True,
    "node_clustering_num_rounds": 10,
    "node_clustering_eval_epochs": 50,
}


datasets = ["cora","hvr","citeseer", "polblogs"]

#Bernoulli
model_01 = Bernoulli(embedding_dimension=64, decoder='sigmoid')
model_02 = Bernoulli(embedding_dimension=64, decoder='gaussian')
model_03 = Bernoulli(embedding_dimension=64, decoder='exponential')
model_04 = Bernoulli(embedding_dimension=64, decoder='dist2')


#KL
model_05 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='ppr')
model_06 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='sum_power_tran')
model_07 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='transition')


#Matrix Factorization
model_08 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency', embedding_option=1)
model_09 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian', embedding_option=1) 
model_10 = MatrixFactorization(embedding_dimension=64, similarity_measure='transition', embedding_option=1)
model_11 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian', embedding_option=1)
model_12 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF', embedding_option=1)
model_13 = MatrixFactorization(embedding_dimension=64, similarity_measure='ppr', embedding_option=1)
model_14 = MatrixFactorization(embedding_dimension=64, similarity_measure='sum_power_tran', embedding_option=1)

model_15 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency', embedding_option=2)
model_16 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian', embedding_option=2) 
model_17 = MatrixFactorization(embedding_dimension=64, similarity_measure='transition', embedding_option=2)
model_18 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian', embedding_option=2)
model_19 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF', embedding_option=2)
model_20 = MatrixFactorization(embedding_dimension=64, similarity_measure='ppr', embedding_option=2)
model_21 = MatrixFactorization(embedding_dimension=64, similarity_measure='sum_power_tran', embedding_option=2)

embedding_methods = [
            model_01, model_02, model_03, model_04, # Bernoulli
            model_05, model_06, model_07, # KL
            model_08, model_09, model_10, model_11, model_12, model_13, model_14, # Matrix Factorization 1
            model_15, model_16, model_17, model_18, model_19, model_20, model_21]

# setup folders to store experiment setup summary and results
result_folder = plot_util.setup_folders_and_summary_files(exp, datasets, embedding_methods)
print(f'The results of the current experiment are stored at experiments/{result_folder}')

for dataset in datasets:
    print(f'##### {dataset} #####\n\n')
    
    # load dataset
    A, y = graph_util.load_dataset(dataset)
    
    for model in embedding_methods:
        
        print(model.get_method_summary())
        start = time.time()
        
        # link prediction
        if(exp["link_prediction"]):
            link_prediction_folder = result_folder + "/link_prediction"
            evaluate_link_prediction.expLP(A,dataset,model,exp["link_pred_num_rounds"],
                                           link_prediction_folder, train_ratio=exp["link_pred_train_ratio"], 
                                           edge_emb_method=exp["link_pred_edge_emb_method"],train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["link_pred_eval_every_n_steps"], undirected=True)
                    
        # node classification
        if(exp["node_classification"]):
            node_classification_folder = result_folder + "/node_classification"
            evaluate_node_classification.expNC(A,y,dataset,model,exp["node_class_num_rounds"],
                                               node_classification_folder, train_ratio=exp["node_class_train_ratio"],
                                               train_epochs=exp["max_epochs"],eval_epochs=exp["node_class_eval_every_n_steps"],undirected=True)
        # node clustering
        if(exp["node_clustering"]):
            node_clustering_folder = result_folder + "/node_clustering"
            evaluate_node_clustering.exp_Node_Clustering(A,y,dataset,model,exp["node_clustering_num_rounds"],
                                           node_clustering_folder, train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["node_clustering_eval_epochs"],undirected=True)
        end = time.time()
        print(f'Model evaluation took: {end-start} seconds')

The results of the current experiment are stored at experiments/results/2020_01_28_10_52
##### cora #####


Bernoulli_sigmoid_adjacency_64

Link prediction evaluation has started...

Epoch    0, loss = 0.97254
Epoch   25, loss = 0.31790
Epoch   50, loss = 0.10532
Epoch   75, loss = 0.04853
Epoch  100, loss = 0.02994
Epoch  125, loss = 0.02179
Epoch  150, loss = 0.01733
Epoch  175, loss = 0.01449
Epoch  200, loss = 0.01247
Epoch  225, loss = 0.01091
Epoch  250, loss = 0.00966
Epoch  275, loss = 0.00861
Epoch  300, loss = 0.00773
Epoch  325, loss = 0.00698
Epoch  350, loss = 0.00633
Epoch  375, loss = 0.00577
Epoch  400, loss = 0.00528
Epoch  425, loss = 0.00486
Epoch  450, loss = 0.00450
Epoch  475, loss = 0.00418
Epoch  500, loss = 0.00390
Epoch  525, loss = 0.00366
Epoch  550, loss = 0.00345
Epoch  575, loss = 0.00326
Epoch  600, loss = 0.00309
Epoch  625, loss = 0.00294
Epoch  650, loss = 0.00280
Epoch  675, loss = 0.00268
Epoch  700, loss = 0.00257
Epoch  725, loss = 0.00247
Epoch  

Epoch 3150, loss = 0.00073
Epoch 3175, loss = 0.00073
Epoch 3200, loss = 0.00072
Epoch 3225, loss = 0.00072
Epoch 3250, loss = 0.00072
Epoch 3275, loss = 0.00071
Epoch 3300, loss = 0.00071
Epoch 3325, loss = 0.00070
Epoch 3350, loss = 0.00070
Epoch 3375, loss = 0.00070
Epoch 3400, loss = 0.00069
Epoch 3425, loss = 0.00069
Epoch 3450, loss = 0.00069
Epoch 3475, loss = 0.00068
Epoch 3500, loss = 0.00068
Epoch 3525, loss = 0.00068
Epoch 3550, loss = 0.00067
Epoch 3575, loss = 0.00067
Epoch 3600, loss = 0.00067
Epoch 3625, loss = 0.00066
Epoch 3650, loss = 0.00066
Epoch 3675, loss = 0.00066
Epoch 3700, loss = 0.00066
Epoch 3725, loss = 0.00065
Epoch 3750, loss = 0.00065
Epoch 3775, loss = 0.00065
Epoch 3800, loss = 0.00064
Epoch 3825, loss = 0.00064
Epoch 3850, loss = 0.00064
Epoch 3875, loss = 0.00063
Epoch 3900, loss = 0.00063
Epoch 3925, loss = 0.00063
Epoch 3950, loss = 0.00063
Epoch 3975, loss = 0.00062
Epoch 4000, loss = 0.00062
Epoch 4025, loss = 0.00062
Epoch 4050, loss = 0.00062
E

Epoch 2175, loss = 0.00093
Epoch 2200, loss = 0.00092
Epoch 2225, loss = 0.00092
Epoch 2250, loss = 0.00091
Epoch 2275, loss = 0.00090
Epoch 2300, loss = 0.00090
Epoch 2325, loss = 0.00089
Epoch 2350, loss = 0.00088
Epoch 2375, loss = 0.00088
Epoch 2400, loss = 0.00087
Epoch 2425, loss = 0.00087
Epoch 2450, loss = 0.00086
Epoch 2475, loss = 0.00086
Epoch 2500, loss = 0.00085
Epoch 2525, loss = 0.00084
Epoch 2550, loss = 0.00084
Epoch 2575, loss = 0.00083
Epoch 2600, loss = 0.00083
Epoch 2625, loss = 0.00082
Epoch 2650, loss = 0.00082
Epoch 2675, loss = 0.00081
Epoch 2700, loss = 0.00081
Epoch 2725, loss = 0.00080
Epoch 2750, loss = 0.00080
Epoch 2775, loss = 0.00079
Epoch 2800, loss = 0.00079
Epoch 2825, loss = 0.00079
Epoch 2850, loss = 0.00078
Epoch 2875, loss = 0.00078
Epoch 2900, loss = 0.00077
Epoch 2925, loss = 0.00077
Epoch 2950, loss = 0.00076
Epoch 2975, loss = 0.00076
Epoch 3000, loss = 0.00076
Epoch 3025, loss = 0.00075
Epoch 3050, loss = 0.00075
Epoch 3075, loss = 0.00074
E

Epoch 1200, loss = 0.00149
