In [1]:
import os
os.chdir('../')

In [2]:
%matplotlib inline
#%matplotlib notebook

%load_ext autoreload
%autoreload 2

In [3]:
from copy import deepcopy
from typing import List, Tuple

from cycler import cycler
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
import numpy as np
import pandas as pd
import scipy.sparse as sp
from sklearn.decomposition import PCA
import scipy.stats as stats
import torch
from torch import nn
import torch.nn.functional as F
import seml

import tqdm
tqdm.tqdm.pandas()
#plt.style.use('ggplot')

In [90]:
df_experiments_1 = seml.get_results('rgnn_victims_cora',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])

HBox(children=(FloatProgress(value=0.0, max=90.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=90.0), HTML(value='')))




In [91]:
df_experiments_2 = seml.get_results('rgnn_victims_citeseer',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])

HBox(children=(FloatProgress(value=0.0, max=181.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=181.0), HTML(value='')))




In [99]:
df_experiments_3 = seml.get_results('rgnn_rpprgo_papers100M',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])

HBox(children=(FloatProgress(value=0.0, max=234.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=234.0), HTML(value='')))




In [100]:
df_experiments = df_experiments_1.append(df_experiments_2, ignore_index=True)
df_experiments = df_experiments.append(df_experiments_3, ignore_index=True)

In [101]:
df_experiments["config.model_params.label"].unique()

array(['Vanilla PPRGo', 'Soft Medoid RPPRGo (T=1.0)',
       'Soft Medoid RPPRGo (T=0.5)', 'Soft Medoid RPPRGo (T=0.2)',
       'Soft Median RPPRGo (T=1.0)', 'Soft Median RPPRGo (T=0.5)',
       'Soft Median RPPRGo (T=0.2)', 'Soft Medoid GDC (T=0.2)',
       'Vanilla GCN', 'Vanilla GDC', 'Soft Medoid GDC (T=1.0)',
       'Soft Medoid GDC (T=0.5)', 'Soft Median GDC (T=1.0)',
       'Soft Median GDC (T=0.5)', 'Soft Median GDC (T=0.2)', 'SVD GCN',
       'Jaccard GCN', 'RGCN', 'Soft Medoid PPRGo', 'Soft Median PPRGo',
       'Soft Median PPRGo (T=1.0)', 'Soft Median PPRGo (T=0.5)',
       'Soft Median PPRGo (T=0.2)', 'Soft Median PPRGo (T=5.0)',
       'Soft Median PPRGo (T=10.0)', 'Vanilla PPRGo Diffuse Embeddings'],
      dtype=object)

In [102]:
df_experiments["config.dataset"].unique()

array(['cora_ml', 'citeseer', 'ogbn-papers100M'], dtype=object)

In [103]:
model_params = {
    "Num. Layer": "nlayers",
    "Num. Filter": "n_filters",
    "Hidden Size": "hidden_size",
    "Dropout": "dropout",
    "Batch Norm": "batch_norm",
    "GDC Alpha": "gdc_params.alpha",
    "GDC K": "gdc_params.k",
    "SVD Rank": "svd_params.rank",
    "Jaccard Threshold": "jaccard_params.threshold",
    "Aggregation (Mean)": "mean",
    "Aggregation (Mean) topk": "mean_kwargs.k",
    "Aggregation (Mean) temp.": "mean_kwargs.temperature",
    "PPR Alpha": "alpha",
    "PPR Norm": "ppr_normalization",
    "PPR TopK": "topk",
    "PPR TopK eps": "eps",
}
model_params = {"config.model_params."+ v:k for k, v in model_params.items()}
model_params

{'config.model_params.nlayers': 'Num. Layer',
 'config.model_params.n_filters': 'Num. Filter',
 'config.model_params.hidden_size': 'Hidden Size',
 'config.model_params.dropout': 'Dropout',
 'config.model_params.batch_norm': 'Batch Norm',
 'config.model_params.gdc_params.alpha': 'GDC Alpha',
 'config.model_params.gdc_params.k': 'GDC K',
 'config.model_params.svd_params.rank': 'SVD Rank',
 'config.model_params.jaccard_params.threshold': 'Jaccard Threshold',
 'config.model_params.mean': 'Aggregation (Mean)',
 'config.model_params.mean_kwargs.k': 'Aggregation (Mean) topk',
 'config.model_params.mean_kwargs.temperature': 'Aggregation (Mean) temp.',
 'config.model_params.alpha': 'PPR Alpha',
 'config.model_params.ppr_normalization': 'PPR Norm',
 'config.model_params.topk': 'PPR TopK',
 'config.model_params.eps': 'PPR TopK eps'}

In [110]:
train_params = {
    "Learning Rate": "lr",
    "Weight Decay": "weight_decay",
    "Max Epochs": "max_epochs",
    "Patience": "patience",
    "Batch Size": "batch_size",
    "Annealing Scheduler": "use_annealing_scheduler",
    #"AS with Warm Restart": "scheduler_warm_restarts",
}
train_params = {"config.train_params."+ v:k for k, v in train_params.items()}
train_params

{'config.train_params.lr': 'Learning Rate',
 'config.train_params.weight_decay': 'Weight Decay',
 'config.train_params.max_epochs': 'Max Epochs',
 'config.train_params.patience': 'Patience',
 'config.train_params.batch_size': 'Batch Size',
 'config.train_params.use_annealing_scheduler': 'Annealing Scheduler'}

In [111]:
labels_to_plot = [
    'Vanilla GCN',
    'Vanilla GDC',
    'SVD GCN',
    'Jaccard GCN',
    'RGCN',
    'Soft Medoid GDC (T=0.5)',
    #'Soft Median GDC (T=1.0)',
    #'Soft Median GDC (T=0.5)',
    'Soft Median GDC (T=0.2)'
]

In [233]:
dataset_map = {
    'cora_ml': r'Cora ML', # '\rotatebox{90}{Cora ML~\citep{Bojchevski2018}}',
    'citeseer': r'Citeseer', # ~\citep{McCallum2000}
    'pubmed': r'PubMed', # ~\citep{Sen2008}
    'ogbn-arxiv': r'arXiv', # ~\citep{Hu2020}
    'ogbn-products': r'Products', # ~\citep{Hu2020},
    'ogbn-papers100M': r'Papers 100M'
}
dataset_order = [dataset_map[k] for k in dataset_map.keys()]

In [234]:
dataset_order

['Cora ML', 'Citeseer', 'PubMed', 'arXiv', 'Products', 'Papers 100M']

In [235]:
groups_experiment = df_experiments.groupby(["config.dataset", "config.model_params.label"])

In [236]:
df_hpt = pd.DataFrame(columns=["label", "dataset", "seeds"] + list(train_params.keys()) + list(model_params.keys()))
for (dataset, label), max_idx in groups_experiment.idxmax()["result.accuracy"].iteritems():
    df_hpt.loc[len(df_hpt)] = [label, dataset, []] + list(df_experiments.iloc[max_idx][list(train_params.keys()) + list(model_params.keys())])

In [246]:
df_hpt = df_hpt.sort_values(["label", "dataset"]).fillna("-")
df_hpt["dataset"] = df_hpt["dataset"].replace(to_replace=dataset_map)

In [268]:
df_hpt_orig = df_hpt.copy()
len(df_hpt_orig)

45

In [274]:
# select which rows to include
df_hpt = df_hpt_orig.iloc[:10].copy()

In [275]:
architecture_c = r"\textbf{Architecture}"
dataset_c = r'\textbf{Dataset}'
parameter_c = r'\textbf{Parameter}'

prefix = r"""
\centering
\label{tab:global_small}
\resizebox{\textwidth}{!}{
\begin{tabular}{ll""" + (r"c" * len(df_hpt)) + r"}" + "\n"


suffix = r"""
\bottomrule
\end{tabular}
}

"""

In [276]:
# header
header = r"\toprule"+ "\n"
header += " & " + architecture_c
for (label, count) in df_hpt.groupby(["label"]).count()["dataset"].iteritems():
    header += " & " + r'\multicolumn{'+str(count)+r'}{c}{\textbf{'+label+r'}}'
    #header += label
header+=r"\\" + "\n"

header += "&" + dataset_c
for dataset in df_hpt["dataset"]:
    header += "\n\t"+" & " + r"\textbf{"+dataset+r"}"
header+=r"\\"+ "\n"

header += " & " + parameter_c
header += " & " * len(df_hpt) + r"\\"

In [279]:
body = r"\midrule"+ "\n"
list() + list(model_params.keys())

body += r"\multirow{"+str(len(train_params.keys()))+r"}{*}{\rotatebox{90}{\textbf{Train Parameter}}}"+ "\n"
for param_col_name in train_params.keys():
    body += "\t" + r"& " + train_params[param_col_name] 
    for val in df_hpt[param_col_name]:
        if isinstance(val, bool):
            val = "True" if val else False
        body += r"& " + str(val)
    body += r"\\" + "\n"

body += r"\cline{1-"+ str(len(df_hpt) + 2) + r"}" + "\n"

body += r"\multirow{"+str(len(model_params.keys()))+r"}{*}{\rotatebox{90}{\textbf{Model Parameter}}}"+ "\n"
for param_col_name in model_params.keys():
    body += "\t" + r"& " + model_params[param_col_name] 
    for val in df_hpt[param_col_name]:
        if isinstance(val, bool):
            val = "True" if val else False
        body += r"& " + str(val)
    body += r"\\" + "\n"
#print(body)

In [278]:
print(prefix + header + body+ suffix)


\centering
\label{tab:global_small}
\resizebox{\textwidth}{!}{
\begin{tabular}{llcccccccccc}
\toprule
 & \textbf{Architecture} & \multicolumn{2}{c}{\textbf{Jaccard GCN}} & \multicolumn{2}{c}{\textbf{RGCN}} & \multicolumn{2}{c}{\textbf{SVD GCN}} & \multicolumn{2}{c}{\textbf{Soft Median GDC (T=0.2)}} & \multicolumn{2}{c}{\textbf{Soft Median GDC (T=0.5)}}\\
&\textbf{Dataset}
	 & \textbf{Citeseer}
	 & \textbf{Cora ML}
	 & \textbf{Citeseer}
	 & \textbf{Cora ML}
	 & \textbf{Citeseer}
	 & \textbf{Cora ML}
	 & \textbf{Citeseer}
	 & \textbf{Cora ML}
	 & \textbf{Citeseer}
	 & \textbf{Cora ML}\\
 & \textbf{Parameter} &  &  &  &  &  &  &  &  &  & \\\midrule
\multirow{6}{*}{\rotatebox{90}{\textbf{Train Parameter}}}
	& Learning Rate& 0.01& 0.01& 0.01& 0.01& 0.01& 0.01& 0.01& 0.01& 0.01& 0.01\\
	& Weight Decay& 0.0005& 0.0005& 0.0005& 0.0005& 0.0005& 0.0005& 0.0005& 0.0005& 0.0005& 0.0005\\
	& Max Epochs& 300& 300& 300& 300& 300& 300& 300& 300& 300& 300\\
	& Patience& 200& 200& 200& 200& 200& 200& 2

In [263]:
len(df_hpt)

45

In [258]:
df_hpt

Unnamed: 0,label,dataset,seeds,config.train_params.lr,config.train_params.weight_decay,config.train_params.max_epochs,config.train_params.patience,config.train_params.batch_size,config.train_params.use_annealing_scheduler,config.model_params.nlayers,config.model_params.n_filters,config.model_params.hidden_size,config.model_params.dropout,config.model_params.batch_norm,config.model_params.gdc_params.alpha,config.model_params.gdc_params.k,config.model_params.svd_params.rank,config.model_params.jaccard_params.threshold,config.model_params.mean,config.model_params.mean_kwargs.k,config.model_params.mean_kwargs.temperature,config.model_params.alpha,config.model_params.ppr_normalization,config.model_params.topk,config.model_params.eps
0,Jaccard GCN,Citeseer,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,-,-,-,-,0.01,-,-,-,-,-,-,-
18,Jaccard GCN,Cora ML,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,0,-,-,-,0.01,-,-,-,-,-,-,-
1,RGCN,Citeseer,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,-,-,-,-,-,-,-,-,-,-,-,-
19,RGCN,Cora ML,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,0,-,-,-,-,-,-,-,-,-,-,-
2,SVD GCN,Citeseer,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,-,-,-,50,-,-,-,-,-,-,-,-
20,SVD GCN,Cora ML,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,0,-,-,50,-,-,-,-,-,-,-,-
3,Soft Median GDC (T=0.2),Citeseer,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,-,0.15,64,-,-,soft_median,-,0.2,-,-,-,-
21,Soft Median GDC (T=0.2),Cora ML,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,0,0.15,64,-,-,soft_median,-,0.2,-,-,-,-
4,Soft Median GDC (T=0.5),Citeseer,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,-,0.15,64,-,-,soft_median,-,0.5,-,-,-,-
22,Soft Median GDC (T=0.5),Cora ML,[],0.01,0.0005,300,200,-,-,-,64,-,0.5,0,0.15,64,-,-,soft_median,-,0.5,-,-,-,-
