In [2]:
# import necessary packages
import numpy as np
import pandas as pd
import json
import pprint
import matplotlib.pyplot as plt

In [3]:
# initialize pretty printer
pp = pprint.PrettyPrinter(indent=4, depth=8)

In [4]:
# read verse, node2vec and deepwalk results from json-file
results_folder = 'results/coauthor/'
verse_results_path = results_folder + 'coauthor2_verse_ppr_conference_classification.json'
node2vec_results_path = results_folder + 'coauthor2_node2vec_conference_classification.json'
deepwalk_results_path = results_folder + 'coauthor2_deepwalk_conference_classification.json'
hete_verse_results_path = results_folder + 'coauthor2_hete_verse_conference_classification_v2.json'

with open(verse_results_path, 'r') as results_file:
    verse_results = json.load(results_file)

with open(node2vec_results_path, 'r') as results_file:
    node2vec_results = json.load(results_file)

with open(deepwalk_results_path, 'r') as results_file:
    deepwalk_results = json.load(results_file)

with open(hete_verse_results_path, 'r') as results_file:
    hete_verse_results = json.load(results_file)

In [5]:
# define used evaluation metrics
evaluation_metrics = list(verse_results['parameterizations'][0]['runs'][0]['evaluation'].keys())

In [6]:
# dict of evaluation metrics with all performances scores over all runs for each hyper-parameter setting
total_eval_results = []
num_hyper_param_settings = len(verse_results['parameterizations'])
for i in range(num_hyper_param_settings):
    total_eval_results.append({})
    total_eval_results[i]['params'] = verse_results['parameterizations'][i]['params']
    total_eval_results[i]['eval'] = {}
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric] = []
    for run in verse_results['parameterizations'][i]['runs']:
        for metric in evaluation_metrics:
            total_eval_results[i]['eval'][metric].append(run['evaluation'][metric])

In [7]:
# compute average performance per evaluation metric over all runs for each hyper-parameter setting
for i in range(len(total_eval_results)):
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric + '_avg'] = np.mean(total_eval_results[i]['eval'][metric])

In [8]:
# print general information
print('Embedding-Method: {}'.format(verse_results['method']))
print('Dataset: {}'.format(verse_results['dataset']))
print('Used embedding: {}'.format(verse_results['embedding_file']))
print('Number of different hyper_parameter settings: {}'.format(len(verse_results['parameterizations'])))

Embedding-Method: Verse-PPR
Dataset: co-author
Used embedding: 
Number of different hyper_parameter settings: 10


In [9]:
# print average results per train size for verse embeddings on coauthor classification of conference per paper
for i in range(len(total_eval_results)):
    print("Train size: {}%, Macro: {}, Micro: {}\n".format(total_eval_results[i]['params']['train_size']*100,
                                                          round(total_eval_results[i]['eval']['macro_avg'],4),
                                                          round(total_eval_results[i]['eval']['micro_avg'],4)))

Train size: 5.0%, Macro: 0.6805, Micro: 0.6808

Train size: 10.0%, Macro: 0.6951, Micro: 0.6956

Train size: 15.0%, Macro: 0.7178, Micro: 0.7182

Train size: 20.0%, Macro: 0.7317, Micro: 0.732

Train size: 25.0%, Macro: 0.7384, Micro: 0.7385

Train size: 30.0%, Macro: 0.7451, Micro: 0.7451

Train size: 35.0%, Macro: 0.7481, Micro: 0.7481

Train size: 40.0%, Macro: 0.7516, Micro: 0.7516

Train size: 45.0%, Macro: 0.7543, Micro: 0.7542

Train size: 50.0%, Macro: 0.7556, Micro: 0.7556



In [10]:
# dict of evaluation metrics with all performances scores over all runs for each hyper-parameter setting
total_eval_results = []
num_hyper_param_settings = len(node2vec_results['parameterizations'])
for i in range(num_hyper_param_settings):
    total_eval_results.append({})
    total_eval_results[i]['params'] = node2vec_results['parameterizations'][i]['params']
    total_eval_results[i]['eval'] = {}
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric] = []
    for run in node2vec_results['parameterizations'][i]['runs']:
        for metric in evaluation_metrics:
            total_eval_results[i]['eval'][metric].append(run['evaluation'][metric])

In [11]:
# compute average performance per evaluation metric over all runs for each hyper-parameter setting
for i in range(len(total_eval_results)):
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric + '_avg'] = np.mean(total_eval_results[i]['eval'][metric])

In [12]:
# print general information
print('Embedding-Method: {}'.format(node2vec_results['method']))
print('Dataset: {}'.format(node2vec_results['dataset']))
print('Used embedding: {}'.format(node2vec_results['embedding_file']))
print('Number of different hyper_parameter settings: {}'.format(len(node2vec_results['parameterizations'])))

Embedding-Method: node2vec
Dataset: co-author
Used embedding: 
Number of different hyper_parameter settings: 10


In [13]:
# print average results per train size for node2vec embeddings on coauthor classification of conference per paper
for i in range(len(total_eval_results)):
    print("Train size: {}%, Macro: {}, Micro: {}\n".format(total_eval_results[i]['params']['train_size']*100,
                                                          round(total_eval_results[i]['eval']['macro_avg'],4),
                                                          round(total_eval_results[i]['eval']['micro_avg'],4)))

Train size: 5.0%, Macro: 0.6681, Micro: 0.6683

Train size: 10.0%, Macro: 0.6917, Micro: 0.6917

Train size: 15.0%, Macro: 0.709, Micro: 0.709

Train size: 20.0%, Macro: 0.7197, Micro: 0.7198

Train size: 25.0%, Macro: 0.7277, Micro: 0.7275

Train size: 30.0%, Macro: 0.7332, Micro: 0.7331

Train size: 35.0%, Macro: 0.7382, Micro: 0.7381

Train size: 40.0%, Macro: 0.7416, Micro: 0.7415

Train size: 45.0%, Macro: 0.7423, Micro: 0.7422

Train size: 50.0%, Macro: 0.7435, Micro: 0.7434



In [14]:
# dict of evaluation metrics with all performances scores over all runs for each hyper-parameter setting
total_eval_results = []
num_hyper_param_settings = len(deepwalk_results['parameterizations'])
for i in range(num_hyper_param_settings):
    total_eval_results.append({})
    total_eval_results[i]['params'] = deepwalk_results['parameterizations'][i]['params']
    total_eval_results[i]['eval'] = {}
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric] = []
    for run in deepwalk_results['parameterizations'][i]['runs']:
        for metric in evaluation_metrics:
            total_eval_results[i]['eval'][metric].append(run['evaluation'][metric])

In [15]:
# compute average performance per evaluation metric over all runs for each hyper-parameter setting
for i in range(len(total_eval_results)):
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric + '_avg'] = np.mean(total_eval_results[i]['eval'][metric])

In [16]:
# print general information
print('Embedding-Method: {}'.format(deepwalk_results['method']))
print('Dataset: {}'.format(deepwalk_results['dataset']))
print('Used embedding: {}'.format(deepwalk_results['embedding_file']))
print('Number of different hyper_parameter settings: {}'.format(len(deepwalk_results['parameterizations'])))

Embedding-Method: deepwalk
Dataset: co-author
Used embedding: 
Number of different hyper_parameter settings: 10


In [17]:
# print average results per train size for node2vec embeddings on coauthor classification of conference per paper
for i in range(len(total_eval_results)):
    print("Train size: {}%, Macro: {}, Micro: {}\n".format(total_eval_results[i]['params']['train_size']*100,
                                                          round(total_eval_results[i]['eval']['macro_avg'],4),
                                                          round(total_eval_results[i]['eval']['micro_avg'],4)))

Train size: 5.0%, Macro: 0.659, Micro: 0.6628

Train size: 10.0%, Macro: 0.6845, Micro: 0.6871

Train size: 15.0%, Macro: 0.7044, Micro: 0.7069

Train size: 20.0%, Macro: 0.717, Micro: 0.7193

Train size: 25.0%, Macro: 0.725, Micro: 0.7269

Train size: 30.0%, Macro: 0.7312, Micro: 0.7329

Train size: 35.0%, Macro: 0.7331, Micro: 0.7348

Train size: 40.0%, Macro: 0.7355, Micro: 0.7372

Train size: 45.0%, Macro: 0.7369, Micro: 0.7385

Train size: 50.0%, Macro: 0.737, Micro: 0.7387



In [18]:
# dict of evaluation metrics with all performances scores over all runs for each hyper-parameter setting
total_eval_results = []
num_hyper_param_settings = len(hete_verse_results['parameterizations'])
for i in range(num_hyper_param_settings):
    total_eval_results.append({})
    total_eval_results[i]['params'] = hete_verse_results['parameterizations'][i]['params']
    total_eval_results[i]['eval'] = {}
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric] = []
    for run in hete_verse_results['parameterizations'][i]['runs']:
        for metric in evaluation_metrics:
            total_eval_results[i]['eval'][metric].append(run['evaluation'][metric])

In [19]:
# compute average performance per evaluation metric over all runs for each hyper-parameter setting
for i in range(len(total_eval_results)):
    for metric in evaluation_metrics:
        total_eval_results[i]['eval'][metric + '_avg'] = np.mean(total_eval_results[i]['eval'][metric])

In [20]:
# print general information
print('Embedding-Method: {}'.format(hete_verse_results['method']))
print('Dataset: {}'.format(hete_verse_results['dataset']))
print('Used embedding: {}'.format(hete_verse_results['embedding_file']))
print('Number of different hyper_parameter settings: {}'.format(len(hete_verse_results['parameterizations'])))

Embedding-Method: hete-VERSE
Dataset: co-author
Used embedding: 
Number of different hyper_parameter settings: 10


In [21]:
# print average results per train size for hete-verse embeddings on coauthor classification of conference per paper
for i in range(len(total_eval_results)):
    print("Train size: {}%, Macro: {}, Micro: {}\n".format(total_eval_results[i]['params']['train_size']*100,
                                                          round(total_eval_results[i]['eval']['macro_avg'],4),
                                                          round(total_eval_results[i]['eval']['micro_avg'],4)))

Train size: 5.0%, Macro: 0.4484, Micro: 0.4486

Train size: 10.0%, Macro: 0.496, Micro: 0.4958

Train size: 15.0%, Macro: 0.5251, Micro: 0.5245

Train size: 20.0%, Macro: 0.5425, Micro: 0.542

Train size: 25.0%, Macro: 0.5543, Micro: 0.5536

Train size: 30.0%, Macro: 0.5587, Micro: 0.558

Train size: 35.0%, Macro: 0.5641, Micro: 0.5635

Train size: 40.0%, Macro: 0.568, Micro: 0.5674

Train size: 45.0%, Macro: 0.5711, Micro: 0.5702

Train size: 50.0%, Macro: 0.5743, Micro: 0.5736

