In [1]:
!pip install tabulate
from tabulate import tabulate
import h5py
import numpy as np
import pandas as pd
from glob import glob
from collections import defaultdict



In [2]:
def extract_model_key(param, is_directed, dname):
    params = param.split('_')
    model_key = None
    if is_directed:
        if 'line' in param:
            if len(params) == 2:
                if '1st' in param:
                    model_key = 'LINE-1st'
                else:
                    model_key = 'LINE-2nd'

        elif 'node2vec' in param:
            if len(params) == 4:
                model_key = 'node2vec'

    elif dname in ['cora', 'pubmed']:
        if 'spectral' in param:
            model_key = 'Eigenmaps'
        elif 'line' in param:
            if len(params) != 2:
                if '1st' in param:
                    model_key = 'LINE-1st'
                else:
                    model_key = 'LINE-2nd'
        elif 'node2vec' in param:
            if len(params) != 4:
                model_key = 'node2vec'
    else: # blogcatalog & flicker
        if 'spectral' in param:
            model_key = 'Eigenmaps'
        elif 'line' in param:
            if '1st' in param:
                model_key = 'LINE-1st'
            else:
                model_key = 'LINE-2nd'
        elif 'node2vec' in param:
            model_key = 'node2vec'

    return model_key

In [3]:
data_parent_dir = './../../../data/'
table_data = []
record_list = []

In [4]:
# Directed case
directed_dnames = ['cora', 'pubmed']
for dname in directed_dnames:
    data_dir = data_parent_dir + dname + '/'
    best_save_dicts = defaultdict(dict)
    for model_name in ['LINE-1st', 'LINE-2nd', 'node2vec']:
        best_save_dicts[model_name]['best_f1_score'] = 0.
        best_save_dicts[model_name]['best_f1_scores_std'] = 0.
        best_save_dicts[model_name]['best_f1_score_param'] = ''
        best_save_dicts[model_name]['best_f1_score_incorrect_ids'] = []
    h5_file_path_list = glob('{}/*.h5'.format(data_dir))
    for h5_file_path in h5_file_path_list:
        if 'spectral' in h5_file_path:
            continue

        results = h5py.File(h5_file_path)
        results = results[list(results.keys())[0]]
        param_list = list(results.keys())
        for param in param_list:
            model_key = extract_model_key(param=param, is_directed=True, dname=dname)
            if not model_key:
                continue
                    
            normalized_list = list(results[param].keys())
            for normalized in normalized_list:
                micro_f1_scores = results[param][normalized]['micro_f1_scores'][:]
                micro_f1_score = np.mean(micro_f1_scores)
                if best_save_dicts[model_key]['best_f1_score'] <  micro_f1_score:
                    best_save_dicts[model_key]['best_f1_score'] = micro_f1_score
                    best_save_dicts[model_key]['best_f1_scores_std'] = np.std(micro_f1_scores)
                    best_save_dicts[model_key]['best_f1_score_incorrect_ids'] = results[param][normalized]['incorrect_ids'][:]
                    best_save_dicts[model_key]['best_f1_score_param'] = param + ' ' + normalized
    
    if dname == 'pubmed':
        dname = 'PubMed'
    else:
        dname = dname.capitalize()
    table_record = [dname, 'Directed', '---', ]
    for model_name, best_save_dict in best_save_dicts.items():
        record = [dname,
                  'Directed',
                  model_name,                  
                  best_save_dict['best_f1_score_param'],
                  best_save_dict['best_f1_score'],
                  best_save_dict['best_f1_scores_std'],
                  ' '.join(map(str, np.sort(best_save_dict['best_f1_score_incorrect_ids'])))]
        record_list.append(record)
        table_record.append('{0:.3f}'.format(best_save_dict['best_f1_score']) + ' pm {0:.3f}'.format(best_save_dict['best_f1_scores_std']))
    table_data.append(table_record)

In [5]:
# Undirected case
directed_dnames = ['cora', 'pubmed', 'blogcatalog', 'flickr']
for dname in directed_dnames:
    data_dir = data_parent_dir + dname + '/'
    best_save_dicts = defaultdict(dict)
    for model_name in ['Eigenmaps', 'LINE-1st', 'LINE-2nd', 'node2vec']:
        best_save_dicts[model_name]['best_f1_score'] = 0.
        best_save_dicts[model_name]['best_f1_scores_std'] = 0.
        best_save_dicts[model_name]['best_f1_score_param'] = ''
        best_save_dicts[model_name]['best_f1_score_incorrect_ids'] = []
    h5_file_path_list = glob('{}/*.h5'.format(data_dir))
    for h5_file_path in h5_file_path_list:

        results = h5py.File(h5_file_path)
        results = results[list(results.keys())[0]]
        param_list = list(results.keys())
        for param in param_list:
            model_key = extract_model_key(param=param, is_directed=False, dname=dname)
            if not model_key:
                continue

            normalized_list = list(results[param].keys())
            for normalized in normalized_list:
                micro_f1_scores = results[param][normalized]['micro_f1_scores'][:]
                micro_f1_score = np.mean(micro_f1_scores)
                if best_save_dicts[model_key]['best_f1_score'] <  micro_f1_score:
                    best_save_dicts[model_key]['best_f1_score'] = micro_f1_score
                    best_save_dicts[model_key]['best_f1_scores_std'] = np.std(micro_f1_scores)
                    best_save_dicts[model_key]['best_f1_score_incorrect_ids'] = results[param][normalized]['incorrect_ids'][:]
                    best_save_dicts[model_key]['best_f1_score_param'] = param + ' ' + normalized

    if dname == 'pubmed':
        dname = 'uPubMed'
    elif dname == 'blogcatalog':
        dname = 'BlogCatalog'
    elif dname == 'cora':
        dname = 'uCora'
    else:
        dname = dname.capitalize()
        
    table_record = [dname, 'Undirected']
    
    for model_name, best_save_dict in best_save_dicts.items():
        record = [dname,
                  'Undirected',
                  model_name,                  
                  best_save_dict['best_f1_score_param'],
                  best_save_dict['best_f1_score'],
                  best_save_dict['best_f1_scores_std'],
                  ' '.join(map(str, np.sort(best_save_dict['best_f1_score_incorrect_ids'])))]
        
        record_list.append(record)
        f1_score_str = '{0:.3f}'.format(best_save_dict['best_f1_score'])
        table_record.append(f1_score_str + ' pm {:.3f}'.format(best_save_dict['best_f1_scores_std']))
    table_data.append(table_record)


In [6]:
table = tabulate(table_data,
                 headers=['Dataset', 'Edge', 'Eigenmaps', 'LINE-1st', 'LINE-2nd', 'node2vec'],
                 tablefmt='latex_booktabs'
                )

In [7]:
print('\\begin{table*}[t]\n  \caption{Micro F1 score (averaged on 5 validation folds) of multi-class classification.\label{tb:best-result}}\n  \centering')
print(table.replace('pm', '$\pm$').replace('llllll', 'c'*6))
print('\\end{table*}')

\begin{table*}[t]
  \caption{Micro F1 score (averaged on 5 validation folds) of multi-class classification.\label{tb:best-result}}
  \centering
\begin{tabular}{cccccc}
\toprule
 Dataset     & Edge       & Eigenmaps      & LINE-1st       & LINE-2nd       & node2vec       \\
\midrule
 Cora        & Directed   & ---            & 0.805 $\pm$ 0.015 & 0.545 $\pm$ 0.023 & 0.357 $\pm$ 0.005 \\
 PubMed      & Directed   & ---            & 0.786 $\pm$ 0.004 & 0.618 $\pm$ 0.011 & 0.531 $\pm$ 0.008 \\
 uCora       & Undirected & 0.861 $\pm$ 0.016 & 0.818 $\pm$ 0.010 & 0.804 $\pm$ 0.014 & 0.837 $\pm$ 0.019 \\
 uPubMed     & Undirected & 0.818 $\pm$ 0.003 & 0.791 $\pm$ 0.006 & 0.785 $\pm$ 0.003 & 0.814 $\pm$ 0.009 \\
 BlogCatalog & Undirected & 0.390 $\pm$ 0.012 & 0.362 $\pm$ 0.009 & 0.354 $\pm$ 0.007 & 0.348 $\pm$ 0.008 \\
 Flickr      & Undirected & 0.000 $\pm$ 0.000 & 0.363 $\pm$ 0.002 & 0.360 $\pm$ 0.001 & 0.328 $\pm$ 0.001 \\
\bottomrule
\end{tabular}
\end{table*}


In [8]:
df = pd.DataFrame(data=record_list, columns=['dataset', 'direction', 'model', 'param', 'micro f1', 'std', 'incorrect_ids'])
df.to_csv(data_parent_dir + 'best_result.tsv', sep='\t', index=False)