In [5]:
%load_ext autoreload
%autoreload 2

import sys
import time
sys.path.insert(0, '../')

# import embedding methods
from embedding.bernoulli import Bernoulli
from embedding.kl import KL
from embedding.matrix_factorization import MatrixFactorization

# import evaluation methods
from evaluation import evaluate_link_prediction
from evaluation import evaluate_node_classification
from evaluation import evaluate_node_clustering
# 
# import utils
from utils import graph_util
from utils import plot_util
from utils import model_util

# visualization
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')

import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Experiment 1

In [6]:
# Experiment 1

exp = {
    "max_epochs": 500,
    "learning_rate": 1e-2, #Adam
    "weight_decay": 1e-7,
    
    "link_prediction": True,
    "link_pred_num_rounds": 2,
    "link_pred_train_ratio": 0.8,
    "link_pred_eval_every_n_steps": 5,
    "link_pred_edge_emb_method": "average",
    
    "node_classification": False,
    "node_class_num_rounds": 2,
    "node_class_train_ratio": 0.8,
    "node_class_eval_every_n_steps": 5,
    
    "node_clustering": False,
    "node_clustering_num_rounds": 5,
    "node_clustering_eval_epochs": 1,
}

# pick datasets
datasets = ["cora", "citeseer", "polblogs"] # parliament, hvr, purdue_facebook

#TO DO:
#Laplacian (nobrainer) : Till done
#NetMF : Till
#PPR : Till done(?)
#Sum_Power_Tran : Jan
#Sim_Rank : Jan
#Big Task: what to do with W / U Epsilon : Jan
#possible: Forest Fire / Stochastic Optimization


#Bernoulli
model_01 = Bernoulli(embedding_dimension=64, decoder='sigmoid') #works!
#model_02 = Bernoulli, sigmoid with W 
model_03 = Bernoulli(embedding_dimension=64, decoder='gaussian') #works!
model_04 = Bernoulli(embedding_dimension=64, decoder='exponential') #works!
#model_05 = Bernoulli, exponential with W


#KL
#KL(similarity_measure{needs to be row stochastic}|softmax(ZZ^T))
model_06 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='PPR')
model_07 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='Sum_Power_Tran')
model_08 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='Sim_Rank')
model_09 = KL(embedding_dimension=64, decoder='softmax', similarity_measure='Transition')
#model_10 = KL, PPR, with W
#model_11 = KL, Sum_Power_Tran, with W
#model_12 = KL, Sim_Rank, with W
#model_13 = KL, Transition, with W


#Matrix Factorization
model_14 = MatrixFactorization(embedding_dimension=64, similarity_measure='adjacency') #works!
model_15 = MatrixFactorization(embedding_dimension=64, similarity_measure='laplacian')
model_16 = MatrixFactorization(embedding_dimension=64, similarity_measure='Transition')
model_17 = MatrixFactorization(embedding_dimension=64, similarity_measure='sym_normalized_laplacian') #works!
model_18 = MatrixFactorization(embedding_dimension=64, similarity_measure='NetMF')
model_19 = MatrixFactorization(embedding_dimension=64, similarity_measure='PPR')
model_20 = MatrixFactorization(embedding_dimension=64, similarity_measure='Sum_Power_Tran')
model_21 = MatrixFactorization(embedding_dimension=64, similarity_measure='Sim_Rank')
#model_22 = MF, adjacency, with W
#model_23 = MF, laplacian, with W
#model_24 = MF, Transition, with W
#model_25 = MF, sym_normalized_laplacian, with W
#model_26 = MF, NetMF, with W
#model_27 = MF, PPR, with W
#model_28 = MF, Sum_Power_Tran, with W
#model_29 = MF, Sim_Rank, with W





embedding_methods = [model_04]

# Normalized mutual information
total_NMI=[]

# setup folders to store experiment setup summary and results
result_folder = plot_util.setup_folders_and_summary_files(exp, datasets, embedding_methods)
print(f'The results of the current experiment are stored at experiments/{result_folder}')

for dataset in datasets:
    
    # load dataset
    A, y = graph_util.load_dataset(dataset)
    
    
    for model in embedding_methods:
        start = time.time()
        # do link prediction
        if(exp["link_prediction"]):
            link_prediction_folder = result_folder + "/link_prediction"
            evaluate_link_prediction.expLP(A,dataset,model,exp["link_pred_num_rounds"],
                                           link_prediction_folder, train_ratio=exp["link_pred_train_ratio"], 
                                           edge_emb_method=exp["link_pred_edge_emb_method"],train_epochs=exp["max_epochs"],
                                           eval_epochs=exp["link_pred_eval_every_n_steps"], undirected=True)
            
        # do node classification
        if(exp["node_classification"]):
            node_classification_folder = result_folder + "/node_classification"
            evaluate_node_classification.expNC(A,y,dataset,model,exp["node_class_num_rounds"],
                                               node_classification_folder, train_ratio=exp["node_class_train_ratio"],
                
                                               train_epochs=exp["max_epochs"],eval_epochs=exp["node_class_eval_every_n_steps"],undirected=True)
        if(exp["node_clustering"]):
            node_clustering_folder = result_folder + "/node_clustering"
            NMI = evaluate_node_clustering.exp_Node_Clustering(A,y,dataset,model,exp["node_clustering_num_rounds"],
                                           node_clustering_folder ,eval_epochs=exp["node_clustering_eval_epochs"],undirected=True)
            total_NMI.append(NMI)
            
        end = time.time()
        print(f'Model evaluation took: {end-start} seconds')

The results of the current experiment are stored at experiments/results/2020_01_09_11_10
	Link prediction evaluation has started...
Epoch    0, loss = 157129312.00000
Epoch   10, loss = 155755712.00000
Epoch   10, loss = 155533712.00000
Epoch   20, loss = 152904896.00000
Epoch   20, loss = 152684352.00000
Epoch   30, loss = 151015584.00000
Epoch   30, loss = 150943760.00000
Epoch   40, loss = 150650624.00000
Epoch   40, loss = 150630944.00000
Epoch   50, loss = 150530032.00000
Epoch   50, loss = 150524288.00000
Epoch   60, loss = 150482048.00000
Epoch   60, loss = 150479840.00000
Epoch   70, loss = 150460944.00000
Epoch   70, loss = 150459648.00000
Epoch   80, loss = 150448176.00000
Epoch   80, loss = 150447200.00000
Epoch   90, loss = 150439120.00000
Epoch   90, loss = 150438464.00000
Epoch  100, loss = 150432208.00000
Epoch  100, loss = 150431712.00000
Epoch  110, loss = 150426496.00000
Epoch  110, loss = 150426048.00000
Epoch  120, loss = 150421728.00000
Epoch  120, loss = 150421392

Epoch  160, loss = 90700304.00000
Epoch  160, loss = 90700200.00000
Epoch  170, loss = 90699040.00000
Epoch  170, loss = 90698936.00000
Epoch  180, loss = 90697912.00000
Epoch  180, loss = 90697824.00000
Epoch  190, loss = 90696944.00000
Epoch  190, loss = 90696888.00000
Epoch  200, loss = 90696112.00000
Epoch  200, loss = 90696048.00000
Epoch  210, loss = 90695280.00000
Epoch  210, loss = 90695216.00000
Epoch  220, loss = 90694528.00000
Epoch  220, loss = 90694480.00000
Epoch  230, loss = 90693824.00000
Epoch  230, loss = 90693776.00000
Epoch  240, loss = 90693248.00000
Epoch  240, loss = 90693200.00000
Epoch  250, loss = 90692712.00000
Epoch  250, loss = 90692656.00000
Epoch  260, loss = 90692144.00000
Epoch  260, loss = 90692104.00000
Epoch  270, loss = 90691664.00000
Epoch  270, loss = 90691632.00000
Epoch  280, loss = 90691216.00000
Epoch  280, loss = 90691168.00000
Epoch  290, loss = 90690760.00000
Epoch  290, loss = 90690736.00000
Epoch  300, loss = 90690384.00000
Epoch  300, lo

Epoch  350, loss = 209703392.00000
Epoch  360, loss = 209701888.00000
Epoch  360, loss = 209701792.00000
Epoch  370, loss = 209700288.00000
Epoch  370, loss = 209700160.00000
Epoch  380, loss = 209698624.00000
Epoch  380, loss = 209698464.00000
Epoch  390, loss = 209697104.00000
Epoch  390, loss = 209696960.00000
Epoch  400, loss = 209695712.00000
Epoch  400, loss = 209695488.00000
Epoch  410, loss = 209693856.00000
Epoch  410, loss = 209693792.00000
Epoch  420, loss = 209692240.00000
Epoch  420, loss = 209692192.00000
Epoch  430, loss = 209691520.00000
Epoch  430, loss = 209691424.00000
Epoch  440, loss = 209690048.00000
Epoch  440, loss = 209690048.00000
Epoch  450, loss = 209689280.00000
Epoch  450, loss = 209689216.00000
Epoch  460, loss = 209688048.00000
Epoch  460, loss = 209687776.00000
Epoch  470, loss = 209687088.00000
Epoch  470, loss = 209686944.00000
Epoch  480, loss = 209686096.00000
Epoch  480, loss = 209686016.00000
Epoch  490, loss = 209684768.00000
Epoch  490, loss = 2

In [None]:
sns.boxplot(data= total_NMI);