In [1]:
# import necessary stuff and python-wrapper of verse
import os
import pprint
import numpy as np
import json
import sys
import codecs
from scipy.sparse import csr_matrix

from verse.python.wrapper import VERSE
from multi_class_classification import MultiClassClassification
from multi_label_classification import MultiLabelClassification
from clustering import Clustering
from link_prediction import LinkPrediction
from experiment import Experiment

In [2]:
# initialize pretty printer
pp = pprint.PrettyPrinter(indent=4, depth=8)

In [3]:
# define path to c++ inplementation of verse and its *.so compiled files
# instantiate verse algorithm object
cpath = os.path.dirname(os.path.realpath('verse/src/verse.cpp'))
verse = VERSE(cpath=cpath)

In [4]:
# construct example adjacency matrix in csr format
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])

csr_adjacency_matrix = csr_matrix((data, (row, col)), shape=(10, 10))

In [5]:
# define hyper-parameters for verse with personalized page rank as sim_G function
graph = csr_adjacency_matrix
w = None
n_hidden = 128
alpha = 0.85
steps = 100000
n_neg_samples = 3
lr = 0.0025
rng_seed = 0
n_threads = -1

In [6]:
# compute node embeddings
# ATTENTION: takes a while until termination and cannot be terminated through restarting kernel
# Need to kill terminal, where jupyter notebook is started in in order to kill c++-process
verse_ppr_embeddings = verse.verse_ppr(graph, w, n_hidden, alpha, steps, n_neg_samples, lr, rng_seed, n_threads)

In [7]:
# or read *.bin file with precomputed embeddings
embeddings_file_path = 'data/test_converter_verse_embeddings.bin'
embeddings_file = open(embeddings_file_path, "r")
embeddings_file_content = np.fromfile(embeddings_file, dtype=np.float32)
num_of_nodes = int(np.shape(embeddings_file_content)[0] / n_hidden)
verse_ppr_embeddings = embeddings_file_content.reshape((num_of_nodes, n_hidden))

In [8]:
np.shape(verse_ppr_embeddings)

(10, 128)

In [9]:
# instantiate multi-class classification expirement
verse_ppr_classification_experiment = MultiClassClassification(method_name='Verse-PPR', dataset_name='Test-Data',
                                                               performance_function='both', train_size=0.5,
                                                               embeddings=verse_ppr_embeddings,
                                                               node_labels=[0,1,1,0,0,1,1,0,0,1])

Initialize multi-class classification experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!


In [10]:
# train, predict and evaluate multi-class classification experiment
logistic_regression_model = verse_ppr_classification_experiment.train()
node_label_predictions = verse_ppr_classification_experiment.predict()
verse_ppr_classification_experiment_results = verse_ppr_classification_experiment.evaluate()

Train multi-class classification experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!
convergence after 23 epochs took 0 seconds
Trained multi-class classification experiment in 0.0 sec.!
Predict multi-class classification experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!
Predicted multi-class classification experiment in 0.0 sec.!
Evaluate multi-class classification experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


In [11]:
# print results of multi-class classification experiment
verse_ppr_classification_experiment_results

{'macro': 0.16666666666666666, 'micro': 0.20000000000000004}

In [12]:
# instantiate clustering expirement
verse_ppr_clustering_experiment = Clustering(method_name='Verse-PPR', dataset_name='Test-Data', 
                                             performance_function='both', embeddings=verse_ppr_embeddings,
                                             node_labels=[0,0,1,1,2,2,1,0,0,2], n_clusters=3)

Initialize clustering experiment with Verse-PPR on Test-Data evaluated through both!


In [13]:
# train, predict and evaluate clustering experiment
k_means = verse_ppr_clustering_experiment.train()
node_label_predictions = verse_ppr_clustering_experiment.predict()
verse_ppr_clustering_experiment_results = verse_ppr_clustering_experiment.evaluate()

Train clustering experiment with Verse-PPR on Test-Data evaluated through both!
Trained clustering experiment in 0.2 sec.!
Predict clustering experiment with Verse-PPR on Test-Data evaluated through both!
Predicted clustering experiment in 0.0 sec.!
Evaluate clustering experiment with Verse-PPR on Test-Data evaluated through both!


In [14]:
# print results of clustering experiment
verse_ppr_clustering_experiment_results

{'nmi': 0.44270128334604997, 'silhouette': 0.22370306}

In [15]:
# instantiate multi-label classification expirement
verse_ppr_multi_label_classification_experiment = \
    MultiLabelClassification(method_name='Verse-PPR', dataset_name='Test-Data', performance_function='both',
                             embeddings=verse_ppr_embeddings, n_neighbors=3, classifier='logistic_regression',
                             node_labels=[[0],[1,2],[0,2],[0,1],[2],[1],[0,1],[1],[2],[1,0]])

Initialize multi-label classification experiment with Verse-PPR on Test-Data evaluated through both on 30.0% train data!


In [16]:
# train, predict and evaluate multi-label classification experiment
multi_label_model = verse_ppr_multi_label_classification_experiment.train()
node_label_predictions = verse_ppr_multi_label_classification_experiment.predict()
verse_ppr_multi_label_classification_experiment_results = verse_ppr_multi_label_classification_experiment.evaluate()

Train multi-label classification experiment with Verse-PPR on Test-Data evaluated through both on 30.0% train data!
convergence after 45 epochs took 0 seconds


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


convergence after 36 epochs took 0 seconds
convergence after 40 epochs took 0 seconds


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


Trained multi-label classification experiment in 0.19 sec.!
Predict multi-label classification experiment with Verse-PPR on Test-Data evaluated through both on 30.0% train data!
Predicted multi-label classification experiment in 0.0 sec.!
Evaluate multi-label classification experiment with Verse-PPR on Test-Data evaluated through both on 30.0% train data!


In [17]:
# print results of multi-label classification experiment
verse_ppr_multi_label_classification_experiment_results

{'macro': 0.57777777777777761, 'micro': 0.59999999999999998}

In [18]:
# instantiate link prediction expirement
verse_ppr_link_prediction_experiment = \
    LinkPrediction(method_name='Verse-PPR', dataset_name='Test-Data', performance_function='both', 
                   node_embeddings=verse_ppr_embeddings, new_edges=[[1,3],[5,6],[3,6],[1,5],[8,7],[9,4],[7,2]],
                   vector_operator='hadamard', neg_edges=[[2,3],[7,6],[1,6],[2,5],[8,9],[9,3],[7,5]])

Initialize link prediction experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!
Compute edgewise features based on hadamard operator!




In [19]:
# train, predict and evaluate link prediction experiment
link_prediction_model = verse_ppr_link_prediction_experiment.train()
edge_label_predictions = verse_ppr_link_prediction_experiment.predict()
verse_ppr_link_prediction_experiment_results = verse_ppr_link_prediction_experiment.evaluate()

Train link prediction experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!
convergence after 18 epochs took 0 seconds
Trained link prediction experiment in 0.0 sec.!
Predict multi-class classification experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!
Predicted link prediction experiment in 0.0 sec.!
Evaluate link prediction experiment with Verse-PPR on Test-Data evaluated through both on 50.0% train data!


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


In [20]:
# print results of link prediction experiment
verse_ppr_link_prediction_experiment_results

{'macro': 0.70833333333333326, 'micro': 0.7142857142857143}

In [21]:
# init clustering experiment on verse-ppr embeddings with cross-product of experiment_params, each 10x repeated 
verse_ppr_experiment = Experiment(method_name='Verse-PPR', dataset_name='Test-Data', performance_function='nmi',
                                  node_labels=[0,0,1,1,2,2,1,0,0,2],embeddings_file_path=embeddings_file_path,
                                  node_embedings=None, embedding_dimensionality=128, repetitions=10,
                                  experiment_params={'n_clusters': [2,3]},
                                  results_file_path='results/test_verse_ppr_experiment_results.json')

In [22]:
# run experiment wrapper: train, predict and evaluate clustering with each param combination 10 times
verse_ppr_experiment_results = verse_ppr_experiment.run()

Start clustering experiment on Test-Data data set with Verse-PPR embeddings
Repeated 10 times and evaluated through nmiperformance function(s)
Initialize clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Train clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Trained clustering experiment in 0.18 sec.!
Predict clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Predicted clustering experiment in 0.0 sec.!
Evaluate clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Initialize clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Train clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Trained clustering experiment in 0.22 sec.!
Predict clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Predicted clustering experiment in 0.0 sec.!
Evaluate clustering experiment with Verse-PPR on Test-Data evaluated through nmi!
Initialize clustering exper

In [23]:
# pretty-print prediction and evaluation results of each single run with all param-combinations
pp.pprint(verse_ppr_experiment_results)

{   'dataset': 'Test-Data',
    'embedding_file': 'data/test_converter_verse_embeddings.bin',
    'method': 'Verse-PPR',
    'parameterizations': [   {   'params': {'n_clusters': 2},
                                 'runs': [   {   'evaluation': {   'nmi': 0.039112374973780527},
                                                 'predictions': [   0,
                                                                    0,
                                                                    1,
                                                                    1,
                                                                    0,
                                                                    0,
                                                                    0,
                                                                    1,
                                                                    1,
                                                                    1],
         