In [1]:
import os
os.chdir('../')

In [2]:
%matplotlib inline
#%matplotlib notebook

%load_ext autoreload
%autoreload 2

In [3]:
from copy import deepcopy
from typing import List, Tuple

from cycler import cycler
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
import numpy as np
import pandas as pd
import scipy.sparse as sp
from sklearn.decomposition import PCA
import scipy.stats as stats
import torch
from torch import nn
import torch.nn.functional as F
import seml

import tqdm
tqdm.tqdm.pandas()
#plt.style.use('ggplot')

In [4]:
from notebooks import mpl_latex

  self[key] = other[key]


In [5]:
mpl_latex.enable_production_mode()

In [6]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

In [7]:
labels_to_plot = [
    'Soft Median GDC (T=0.5)',
    'Soft Median GDC (T=0.2)',
    'Soft Median GDC (T=5.0)',
    'Soft Median RPPRGo (T=5.0)',
    'Soft Median RPPRGo (T=1.0)',
    'Soft Median PPRGo (T=1.0)',
    'Soft Median RPPRGo (T=0.5)',
    'Soft Median RPPRGo (T=0.2)',
    'Soft Median PPRGo (T=10.0)',
    'Soft Median PPRGo (T=20.0)',
    'Soft Median PPRGo (T=30.0)',
    'Vanilla GCN',
    'Vanilla GDC',
    'Vanilla PPRGo',
    'Soft Medoid GDC (T=0.5)',
    'Soft Median GDC (T=1.0)',
    'Soft Medoid RPPRGo (T=0.2)', 
    'Soft Medoid RPPRGo (T=0.5)',
    'Soft Medoid RPPRGo (T=1.0)',
    'SVD GCN',
    'SVD GCN (rank=50)',
    'SVD GCN (rank=100)',
    'Jaccard GCN',
    'RGCN',
    'Vanilla GAT',
    'Soft Median GAT (T=1.0)',
    'Soft Median GAT (T=0.5)',
    'Soft Median GAT (T=0.2)',
]

In [8]:
dataset_map = {
    'cora_ml': r'\textbf{Cora ML}',#'~\citep{Bojchevski2018}}', # '\rotatebox{90}{Cora ML~\citep{Bojchevski2018}}',
    'citeseer': r'\textbf{Citeseer}',#~\citep{McCallum2000}}',
    'pubmed': r'\textbf{PubMed}',#~\citep{Sen2008}}',
    'ogbn-arxiv': r'\textbf{arXiv}',#~\citep{Hu2020}}',
    'ogbn-products': r'\textbf{Products}',#~\citep{Hu2020}}',
    'ogbn-papers100M': r'\textbf{Papers 100M}',#~\citep{Hu2020}}'
}
dataset_order = [dataset_map[k] for k in dataset_map.keys()]

In [9]:
attack_map = {
    'DICE': r'\textbf{DICE}',
    #'GANG': r'\textbf{GANG (ours)}',
    'FGSM': r'\textbf{FGSM}',
    'GreedyRBCD': r'\underline{\textbf{GR-BCD}}',
    'PGD': r'\textbf{PGD}',
    'PRBCD': r'\underline{\textbf{PR-BCD}}'
}
attack_order = [attack_map[k] for k in attack_map.keys()]

In [10]:
attack_loss_map = {
    'FGSM': 'MCE',
    'GreedyRBCD': 'MCE',
    'PGD': 'tanhMargin',
    'PRBCD': 'tanhMargin'
}
attack_loss_map

{'FGSM': 'MCE',
 'GreedyRBCD': 'MCE',
 'PGD': 'tanhMargin',
 'PRBCD': 'tanhMargin'}

In [11]:
pm = r'\(\pm\)'
bpm = r'\(\boldsymbol{\pm}\)'

In [12]:
def make_max_bold(group):
    for col in list(group.columns):
        if not group[col].isna().all():
            idx = np.where(np.max(group[col].fillna("")) == group[col].fillna(""))[0]
            group.loc[group.index[idx], col] = rf'\textbf{{{group.loc[group.index[idx], col].iloc[0]}}}'\
                .replace(pm, bpm)
    return group

In [13]:
def make_second_best_underlined(group):
    for col in list(group.columns):
        if not group[col].isna().all() and np.unique(group[col].fillna("")).shape[0] > 1:
            second_max = np.unique(group[col].fillna("").apply(
                lambda v: v.replace(r'\textbf{', '').replace('}', '')
            ))[-2]
            idx = np.where(second_max == group[col].fillna(""))[0]
            group.loc[group.index[idx], col] = rf'\underline{{{group.loc[group.index[idx], col].iloc[0]}}}'
    return group

In [14]:
def calc_mean_and_error(values: pd.Series, seeds: pd.Series, with_error=True, decimal_places: int = 3): 
    values, seeds = values.values, seeds.values
    seeds = seeds[~np.isnan(values)]
    values = values[~np.isnan(values)]

    idx = np.unique(seeds, return_index=True)[1]
    values = values[idx]
    
    if with_error:
        return rf'{np.mean(values):.{decimal_places}f} $\pm$ {np.std(values)/len(values):.{decimal_places}f}'
    else:
        return rf'{np.mean(values):.{decimal_places}f}'

In [15]:
from functools import partial

def _mark_best_and_second_best(vector: np.ndarray, 
                               first_mark: str = r'\textbf',
                               second_mark: str = r'\underline',
                               is_higher_better : bool = True) -> np.ndarray:
    vector = vector.astype(object)
    values = np.unique(vector[vector == vector])
    if not len(values):
        return vector
    
    if first_mark:
        if is_higher_better:
            mask = vector == values[-1]
        else:
            mask = vector == values[0]
        for i in mask.nonzero()[0]:
            vector[i] = first_mark + '{' + vector[i] + '}'
    if second_mark:
        if is_higher_better:
            mask = vector == values[-2]
        else:
            mask = vector == values[1]
        for i in mask.nonzero()[0]:
            vector[i] = second_mark + '{' + vector[i] + '}'
    return vector
    

def mark_best_and_second_best(df: pd.DataFrame, 
                              first_mark: str = r'\textbf',
                              second_mark: str = r'\underline',
                              axis: int = 0,
                              is_higher_better : bool = True) -> pd.DataFrame:
    df[:] = np.apply_along_axis(
        partial(_mark_best_and_second_best, first_mark=first_mark, 
                second_mark=second_mark, is_higher_better=is_higher_better),
        axis=axis,
        arr=df.values
    )
    return df

In [16]:
df_experiments = seml.get_results('neurips21_global_attack_transfer',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])
df_experiments = df_experiments[(df_experiments['config.attack'] != 'GANG')
                                & (df_experiments['batch_id'] >= 13)]

df_experiments_losses = seml.get_results('neurips21_global_attack_transfer_losses',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])

df_experiments = pd.concat([df_experiments, df_experiments_losses], ignore_index=True)

df_experiments.tail()

  0%|          | 0/1208 [00:00<?, ?it/s]

  0%|          | 0/1208 [00:00<?, ?it/s]

  parsed = pd.io.json.json_normalize(parsed, sep='.')


  0%|          | 0/69 [00:00<?, ?it/s]

  0%|          | 0/69 [00:00<?, ?it/s]

  parsed = pd.io.json.json_normalize(parsed, sep='.')


Unnamed: 0,_id,batch_id,slurm.experiments_per_job,slurm.sbatch_options.time,slurm.sbatch_options.nodes,slurm.sbatch_options.cpus-per-task,slurm.sbatch_options.mem,slurm.sbatch_options.gres,config.overwrite,config.db_collection,config.dataset,config.data_dir,config.binary_attr,config.normalize,config.make_undirected,config.make_unweighted,config.normalize_attr,config.seed,config.attack,config.attack_params.loss_type,config.epsilons,config.artifact_dir,config.pert_adj_storage_type,config.pert_attr_storage_type,config.model_storage_type,config.model_label,config.surrogate_model_storage_type,config.surrogate_model_label,config.device,config.data_device,config.display_steps,result.results,config.attack_params.base_lr,config.attack_params.do_synchronize,config.attack_params.epochs,config.attack_params.fine_tune_epochs,config.attack_params.keep_heuristic,config.attack_params.search_space_size,slurm.sbatch_options.job-name,slurm.sbatch_options.array,slurm.sbatch_options.output,slurm.array_id,slurm.task_id,config.config_hash,config.debug_level,slurm.sbatch_options.partition,config.attack_params.lr_factor
801,125,7,1,0-08:00,1,4,16G,gpu:1,125,neurips21_global_attack_transfer_losses,ogbn-arxiv,datasets/,False,,True,,,0,PRBCD,tanhMarginMCE-0.95,"[0, 0.01, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,Vanilla GCN,pretrained,Vanilla GCN,0,cpu,,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,True,,,WeightOnly,10000000.0,rgnn_at_scale_attack_evasion_global_transfer_7,0-5,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6442003.0,1.0,,info,,
802,126,7,1,0-08:00,1,4,16G,gpu:1,126,neurips21_global_attack_transfer_losses,ogbn-arxiv,datasets/,False,,True,,,1,PRBCD,tanhMarginMCE-0.85,"[0, 0.01, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,Vanilla GCN,pretrained,Vanilla GCN,0,cpu,,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,True,,,WeightOnly,10000000.0,rgnn_at_scale_attack_evasion_global_transfer_7,0-5,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6442003.0,2.0,,info,,
803,127,7,1,0-08:00,1,4,16G,gpu:1,127,neurips21_global_attack_transfer_losses,ogbn-arxiv,datasets/,False,,True,,,1,PRBCD,tanhMarginMCE-0.95,"[0, 0.01, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,Vanilla GCN,pretrained,Vanilla GCN,0,cpu,,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,True,,,WeightOnly,10000000.0,rgnn_at_scale_attack_evasion_global_transfer_7,0-5,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6442003.0,3.0,,info,,
804,128,7,1,0-08:00,1,4,16G,gpu:1,128,neurips21_global_attack_transfer_losses,ogbn-arxiv,datasets/,False,,True,,,5,PRBCD,tanhMarginMCE-0.85,"[0, 0.01, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,Vanilla GCN,pretrained,Vanilla GCN,0,cpu,,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,True,,,WeightOnly,10000000.0,rgnn_at_scale_attack_evasion_global_transfer_7,0-5,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6442003.0,4.0,,info,,
805,129,7,1,0-08:00,1,4,16G,gpu:1,129,neurips21_global_attack_transfer_losses,ogbn-arxiv,datasets/,False,,True,,,5,PRBCD,tanhMarginMCE-0.95,"[0, 0.01, 0.05, 0.1, 0.25]",cache,evasion_global_transfer_adj,evasion_global_transfer_attr,pretrained,Vanilla GCN,pretrained,Vanilla GCN,0,cpu,,"[{'label': 'Vanilla GCN', 'epsilon': 0, 'accur...",,True,,,WeightOnly,10000000.0,rgnn_at_scale_attack_evasion_global_transfer_7,0-5,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6442003.0,5.0,,info,,


In [17]:
# df_experiments.to_csv('notebooks/kdd21_rgnn_at_scale_attack_evasion_transfer.csv')

In [18]:
df_experiments.columns

Index(['_id', 'batch_id', 'slurm.experiments_per_job',
       'slurm.sbatch_options.time', 'slurm.sbatch_options.nodes',
       'slurm.sbatch_options.cpus-per-task', 'slurm.sbatch_options.mem',
       'slurm.sbatch_options.gres', 'config.overwrite', 'config.db_collection',
       'config.dataset', 'config.data_dir', 'config.binary_attr',
       'config.normalize', 'config.make_undirected', 'config.make_unweighted',
       'config.normalize_attr', 'config.seed', 'config.attack',
       'config.attack_params.loss_type', 'config.epsilons',
       'config.artifact_dir', 'config.pert_adj_storage_type',
       'config.pert_attr_storage_type', 'config.model_storage_type',
       'config.model_label', 'config.surrogate_model_storage_type',
       'config.surrogate_model_label', 'config.device', 'config.data_device',
       'config.display_steps', 'result.results',
       'config.attack_params.base_lr', 'config.attack_params.do_synchronize',
       'config.attack_params.epochs', 'config.attack_

In [19]:
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = experiment['config.dataset']
    df_result['attack'] = experiment['config.attack']
    df_result['seed'] = experiment['config.seed']
    df_result['batch_id'] = experiment['batch_id']
    df_result['novel_loss'] = (
        experiment['config.attack'] not in attack_loss_map
        or experiment['config.attack_params.loss_type'] == attack_loss_map[experiment['config.attack']]
    )
    df_result['lr'] = experiment['config.attack_params.base_lr']
    
df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('batch_id')
df_results = df_results.drop_duplicates([
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_results

Unnamed: 0,label,epsilon,accuracy,dataset,attack,seed,batch_id,novel_loss,lr
52898,Vanilla GCN,0.25,0.167973,ogbn-arxiv,PRBCD,1,1,True,
52938,Vanilla GCN,0.25,0.141802,ogbn-arxiv,PRBCD,5,1,True,
52858,Vanilla GCN,0.25,0.055532,ogbn-arxiv,PRBCD,0,1,True,
53108,Vanilla GCN,0.25,0.497729,pubmed,PRBCD,5,5,True,
53110,Vanilla GCN,0.01,0.782365,pubmed,PRBCD,5,5,False,
...,...,...,...,...,...,...,...,...,...
52414,Vanilla GAT,0.00,0.815020,cora_ml,PGD,0,36,False,0.1
52413,Vanilla GAT,1.00,0.250593,cora_ml,PGD,0,36,False,0.1
52412,Vanilla GAT,0.50,0.401186,cora_ml,PGD,0,36,False,0.1
52418,Vanilla GAT,0.25,0.573913,cora_ml,PGD,0,36,False,0.1


df_results[df_results['dataset'] == 'ogbn-arxiv'][[
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
]]

In [20]:
df_results.attack.unique()

array(['PRBCD', 'GreedyRBCD', 'DICE', 'PGD', 'FGSM'], dtype=object)

In [21]:
df_results.dataset.unique()

array(['ogbn-arxiv', 'pubmed', 'ogbn-products', 'citeseer', 'cora_ml'],
      dtype=object)

In [22]:
df_results[df_results.dataset == 'cora_ml'].label.unique()

array(['Soft Median GAT (T=1.0)', 'Soft Median GAT (T=0.5)',
       'Soft Median GAT (T=0.2)', 'Soft Median RPPRGo (T=0.2)',
       'Soft Median RPPRGo (T=0.5)', 'Soft Median RPPRGo (T=1.0)',
       'SVD GCN (rank=50)', 'SVD GCN (rank=100)', 'Jaccard GCN', 'RGCN',
       'Vanilla PPRGo', 'Soft Medoid RPPRGo (T=1.0)',
       'Soft Medoid RPPRGo (T=0.5)', 'Soft Medoid RPPRGo (T=0.2)',
       'Vanilla Dense GCN', 'Vanilla GDC', 'Soft Medoid GDC (T=0.5)',
       'Soft Median GDC (T=1.0)', 'Soft Median GDC (T=0.5)',
       'Soft Median GDC (T=0.2)', 'Vanilla GCN', 'Vanilla GAT'],
      dtype=object)

In [23]:
df_results[(df_results.dataset == 'ogbn-products')].label.unique()

array(['Soft Median RPPRGo (T=0.2)', 'Vanilla GCN',
       'Vanilla GCN (surrogate)', 'Vanilla GDC',
       'Soft Median GDC (T=5.0)', 'Soft Median GDC (T=10.0)',
       'Vanilla PPRGo', 'Soft Median PPRGo (T=1.0)',
       'Soft Median RPPRGo (T=1.0)', 'Soft Median PPRGo (T=5.0)',
       'Soft Median PPRGo (T=20.0)', 'Soft Median PPRGo (T=10.0)',
       'Soft Median PPRGo (T=30.0)'], dtype=object)

In [24]:
architecture_c = r'\textbf{Architecture}'
dataset_c = r'   '
attack_c = r'\textbf{Attack}'
epsilons_c = r'Frac. edges \(\boldsymbol{\epsilon}\)'
accuracy_c = r'\textbf{Acc.}'

epsilons = [0.01, 0.05, 0.1] #[0.01, 0.05, 0.1, 0.25]
best_attack_mark = r'\textbf'
best_defense_mark = r'\underline'

In [25]:
def transform_label(label: str):
    return (
        label.replace('\n', ' ')
        .replace('RPPRGo', 'PPRGo')
        .replace(' (rank=50)', '')
        #.replace(' (T=0.5)','')
        #.replace(' (T=0.2)','')
        #.replace(' (T=5.0)','')
    )

In [26]:
df_results['label'].apply(transform_label).unique()

array(['Vanilla GCN', 'Soft Median PPRGo (T=0.2)',
       'Soft Median PPRGo (T=0.5)', 'Soft Median PPRGo (T=1.0)',
       'Vanilla PPRGo', 'Soft Median GDC (T=5.0)', 'Vanilla GDC',
       'Soft Median GDC (T=0.5)', 'Soft Median GDC (T=0.2)',
       'Soft Medoid GDC (T=5.0)', 'Soft Median GDC (T=1.0)',
       'Soft Medoid GDC (T=0.5)', 'Vanilla GCN (surrogate)',
       'Soft Median GDC (T=10.0)', 'Soft Medoid GDC (T=0.2)',
       'Soft Medoid GDC (T=1.0)', 'Soft Median PPRGo (T=5.0)',
       'Soft Median PPRGo (T=20.0)', 'Soft Median PPRGo (T=10.0)',
       'Soft Median PPRGo (T=30.0)', 'Soft Median GAT (T=0.2)',
       'Soft Median GAT (T=0.5)', 'Soft Median GAT (T=1.0)', 'SVD GCN',
       'Vanilla Dense GCN', 'SVD GCN (rank=100)', 'RGCN',
       'Soft Medoid PPRGo (T=1.0)', 'Soft Medoid PPRGo (T=0.5)',
       'Soft Medoid PPRGo (T=0.2)', 'Jaccard GCN', 'Vanilla GAT'],
      dtype=object)

In [27]:
df = []

for (dataset, attack, label, epsilon), df_group in df_results[
    (df_results['novel_loss']
     | (df_results['attack'] == 'DICE')
     | (df_results['attack'] == 'GANG'))
    & df_results['label'].replace('PPRGo (T=1.0)', 'RPPRGo (T=1.0)').isin(labels_to_plot)
    & ((df_results.dataset != 'ogbn-products') | (df_results.label != 'Soft Median RPPRGo (T=1.0)'))
].groupby(['dataset', 'attack', 'label', 'epsilon']):
    if len(df_group.seed.unique()) != 3:
        print(f'For {dataset}-{attack}-{epsilon}-{label} collected runs for seed {df_group.seed.tolist()}')
    if label == 'Soft Median GDC (T=0.2)' and dataset == 'ogbn-products':
        continue
    if label == 'Soft Median GDC (T=5.0)' and dataset == 'ogbn-arxiv':
        continue

    values, seeds = df_group.accuracy.values, df_group.seed.values
    seeds = seeds[~np.isnan(values)]
    values = values[~np.isnan(values)]

    idx = np.unique(seeds, return_index=True)[1]
    accuracy = values[idx].mean()
    
    df.append({
        dataset_c: dataset_map[dataset],
        architecture_c: transform_label(label),
        attack_c: attack_map[attack],
        epsilons_c: epsilon,
        'accuracy': accuracy,
        'accuracy_str': calc_mean_and_error(df_group.accuracy, df_group.seed, with_error=True, decimal_places=3)
    })
    
df = pd.DataFrame(df)

df = df[
    ~((df[attack_c] == attack_map['PGD']) & (df[dataset_c] == dataset_map['pubmed']))
    & ~((df[attack_c] == attack_map['FGSM']) & (df[dataset_c] == dataset_map['pubmed']))
    #& ~((df[architecture_c] == transform_label('Soft Medoid GDC (T=0.5)')) & (df[dataset_c] == dataset_map['pubmed']))
]

df

For ogbn-arxiv-GreedyRBCD-0.25-Soft Median GDC (T=0.2) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Soft Median GDC (T=0.5) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Soft Median GDC (T=1.0) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Soft Median GDC (T=5.0) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Soft Median RPPRGo (T=0.2) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Soft Median RPPRGo (T=0.5) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Soft Median RPPRGo (T=1.0) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Soft Medoid GDC (T=0.5) collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Vanilla GCN collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Vanilla GDC collected runs for seed [0, 5]
For ogbn-arxiv-GreedyRBCD-0.25-Vanilla PPRGo collected runs for seed [0, 5]
For ogbn-products-DICE-0.0-Soft Median RPPRGo (T=0.2) collected runs for seed [

Unnamed: 0,Unnamed: 1,\textbf{Architecture},\textbf{Attack},Frac. edges \(\boldsymbol{\epsilon}\),accuracy,accuracy_str
0,\textbf{Citeseer},Jaccard GCN,\textbf{DICE},0.00,0.721034,0.721 $\pm$ 0.005
1,\textbf{Citeseer},Jaccard GCN,\textbf{DICE},0.01,0.719608,0.720 $\pm$ 0.004
2,\textbf{Citeseer},Jaccard GCN,\textbf{DICE},0.05,0.715686,0.716 $\pm$ 0.005
3,\textbf{Citeseer},Jaccard GCN,\textbf{DICE},0.10,0.708021,0.708 $\pm$ 0.004
4,\textbf{Citeseer},Jaccard GCN,\textbf{DICE},0.25,0.685918,0.686 $\pm$ 0.004
...,...,...,...,...,...,...
1853,\textbf{PubMed},Vanilla GDC,\underline{\textbf{PR-BCD}},0.10,0.645065,0.645 $\pm$ 0.005
1854,\textbf{PubMed},Vanilla PPRGo,\underline{\textbf{PR-BCD}},0.00,0.765117,0.765 $\pm$ 0.008
1855,\textbf{PubMed},Vanilla PPRGo,\underline{\textbf{PR-BCD}},0.01,0.737035,0.737 $\pm$ 0.007
1856,\textbf{PubMed},Vanilla PPRGo,\underline{\textbf{PR-BCD}},0.05,0.704496,0.704 $\pm$ 0.007


In [28]:
df_accuracy = df[(df[epsilons_c] == 0) & (df[attack_c] == attack_map['PRBCD'])]
df_accuracy = df_accuracy.drop(columns=[attack_c, epsilons_c, 'accuracy_str'])
df_accuracy[dataset_c] = df_accuracy[dataset_c].apply(lambda dataset: rf'\rotatebox{{90}}{{{dataset}}}')
df_accuracy = df_accuracy.set_index([dataset_c, architecture_c])
df_accuracy.columns = pd.MultiIndex.from_product([[accuracy_c], [' ']])

df_accuracy_str = df[(df[epsilons_c] == 0) & (df[attack_c] == attack_map['PRBCD'])]
df_accuracy_str = df_accuracy_str.drop(columns=[attack_c, epsilons_c, 'accuracy'])
df_accuracy_str[dataset_c] = df_accuracy_str[dataset_c].apply(lambda dataset: rf'\rotatebox{{90}}{{{dataset}}}')
df_accuracy_str = df_accuracy_str.set_index([dataset_c, architecture_c])
df_accuracy_str.columns = pd.MultiIndex.from_product([[accuracy_c], [' ']])

In [29]:
df_accuracy_baseline = df_accuracy.reset_index()
df_accuracy_baseline = df_accuracy_baseline[df_accuracy_baseline[architecture_c] == 'Vanilla GCN']
df_accuracy_baseline = df_accuracy_baseline.set_index(dataset_c)
df_accuracy_baseline = df_accuracy_baseline.iloc[:, df_accuracy_baseline.columns.get_level_values(0)==accuracy_c]
df_accuracy_baseline

Unnamed: 0,\textbf{Acc.}
,
,
\rotatebox{90}{\textbf{Citeseer}},0.715865
\rotatebox{90}{\textbf{Cora ML}},0.827404
\rotatebox{90}{\textbf{arXiv}},0.7065
\rotatebox{90}{\textbf{Products}},0.751484
\rotatebox{90}{\textbf{PubMed}},0.781055


In [30]:
df_tmp = df.copy()
df_tmp[architecture_c] = df_tmp[architecture_c].astype("category")
df_tmp[architecture_c].cat.set_categories(
    list(dict.fromkeys([transform_label(l) for l in labels_to_plot]).keys()),
    inplace=True
)
df_tmp[dataset_c] = df_tmp[dataset_c].apply(lambda cat: rf'\rotatebox{{90}}{{{cat}}}').astype("category")
df_tmp[dataset_c].cat.set_categories(
    [rf'\rotatebox{{90}}{{{cat}}}' for cat in dataset_order],
    inplace=True
)

#df = df.sort_values([dataset_c, architecture_c])

df_tmp = df_tmp[df_tmp[epsilons_c].isin(epsilons)].copy()

df_piv = pd.pivot_table(
    df_tmp, 
    index=[dataset_c, architecture_c], 
    columns=[attack_c, epsilons_c], #['type', attack_c, ' '], 
    values='accuracy_str',
    aggfunc=lambda x: ' '.join(x)
)

df_piv = df_piv[[c for attack in attack_order for c in df_piv.columns if c[0] == attack]]

df_piv = pd.merge(df_piv, df_accuracy_str, on=['   ', architecture_c], how='left')

#df_piv = df_piv.apply(lambda row: row[-1] - row[:-1], axis=1).round(3)
#df_piv = df_piv.round(3)

#print(df_piv.to_markdown().replace(' ', ''))

best_defenses = []
for i in np.array(df_piv.index.get_level_values(0).unique()):
    df_slice = df_piv.loc[(i, slice(None))]
    best_defenses.append([df_slice[col].argsort()[-1] for col in df_slice])

df_piv = df_piv.applymap(lambda val: str(val) if val == val else val)
for epsilon in epsilons:
    current_columns = [c for c in df_piv.columns if c[1] == epsilon]
    df_piv[current_columns] = df_piv[current_columns].groupby(dataset_c).apply(
        partial(mark_best_and_second_best, first_mark=best_attack_mark, second_mark='', axis=1, is_higher_better=False)
    )

for i, bests in zip(np.array(df_piv.index.get_level_values(0).unique()), best_defenses):
    df_slice = df_piv.loc[(i, slice(None))]
    for col, best in zip(df_slice, bests):
        if best == -1:
            continue
        df_slice.loc[df_slice.index[best], col] = rf'{best_defense_mark}{{{df_slice.loc[df_slice.index[best], col]}}}'
    

#df_piv = pd.merge(df_piv, df_accuracy.round(2), on=['   ', architecture_c], how='left')
df_piv = df_piv.fillna('-')
df_piv

Unnamed: 0_level_0,\textbf{Attack},\textbf{DICE},\textbf{DICE},\textbf{DICE},\textbf{FGSM},\textbf{FGSM},\textbf{FGSM},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\textbf{PGD},\textbf{PGD},\textbf{PGD},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\textbf{Acc.}
Unnamed: 0_level_1,Frac. edges \(\boldsymbol{\epsilon}\),0.01,0.05,0.1,0.01,0.05,0.1,0.01,0.05,0.1,0.01,0.05,0.1,0.01,0.05,0.1,Unnamed: 17_level_1
Unnamed: 0_level_2,\textbf{Architecture},Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
\rotatebox{90}{\textbf{Cora ML}},Soft Median GDC (T=0.5),0.832 $\pm$ 0.001,0.826 $\pm$ 0.001,\underline{0.820 $\pm$ 0.001},\underline{0.817 $\pm$ 0.001},\textbf{0.769 $\pm$ 0.001},0.743 $\pm$ 0.002,0.816 $\pm$ 0.001,0.770 $\pm$ 0.001,0.739 $\pm$ 0.002,\textbf{0.812 $\pm$ 0.001},0.770 $\pm$ 0.002,0.735 $\pm$ 0.002,0.813 $\pm$ 0.000,0.770 $\pm$ 0.000,\textbf{0.732 $\pm$ 0.003},0.834 $\pm$ 0.001
\rotatebox{90}{\textbf{Cora ML}},Soft Median GDC (T=0.2),0.822 $\pm$ 0.002,0.816 $\pm$ 0.002,0.813 $\pm$ 0.002,0.811 $\pm$ 0.002,0.784 $\pm$ 0.001,0.769 $\pm$ 0.002,0.811 $\pm$ 0.002,0.783 $\pm$ 0.001,0.765 $\pm$ 0.001,\textbf{0.809 $\pm$ 0.001},0.779 $\pm$ 0.002,0.758 $\pm$ 0.002,0.813 $\pm$ 0.001,\textbf{0.777 $\pm$ 0.000},\textbf{0.752 $\pm$ 0.002},0.824 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},Soft Median PPRGo (T=1.0),0.803 $\pm$ 0.002,0.797 $\pm$ 0.002,0.787 $\pm$ 0.001,\textbf{0.796 $\pm$ 0.002},\textbf{0.775 $\pm$ 0.001},0.761 $\pm$ 0.001,0.797 $\pm$ 0.002,\textbf{0.775 $\pm$ 0.001},0.765 $\pm$ 0.001,\textbf{0.796 $\pm$ 0.002},0.777 $\pm$ 0.002,0.763 $\pm$ 0.001,0.798 $\pm$ 0.002,0.775 $\pm$ 0.002,\textbf{0.758 $\pm$ 0.001},0.804 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},Soft Median PPRGo (T=0.5),0.819 $\pm$ 0.001,0.814 $\pm$ 0.002,0.804 $\pm$ 0.001,0.813 $\pm$ 0.001,\underline{0.792 $\pm$ 0.000},\underline{0.778 $\pm$ 0.001},0.813 $\pm$ 0.001,\underline{0.793 $\pm$ 0.002},\underline{0.781 $\pm$ 0.002},\textbf{0.811 $\pm$ 0.001},\textbf{0.787 $\pm$ 0.001},\underline{\textbf{0.769 $\pm$ 0.001}},0.813 $\pm$ 0.001,0.787 $\pm$ 0.002,\underline{0.770 $\pm$ 0.001},0.821 $\pm$ 0.001
\rotatebox{90}{\textbf{Cora ML}},Soft Median PPRGo (T=0.2),0.806 $\pm$ 0.001,0.799 $\pm$ 0.002,0.789 $\pm$ 0.001,\textbf{0.802 $\pm$ 0.001},\textbf{0.783 $\pm$ 0.001},0.768 $\pm$ 0.001,\textbf{0.802 $\pm$ 0.001},0.783 $\pm$ 0.002,0.772 $\pm$ 0.001,\textbf{0.802 $\pm$ 0.001},0.785 $\pm$ 0.001,0.768 $\pm$ 0.001,\textbf{0.802 $\pm$ 0.001},0.784 $\pm$ 0.002,\textbf{0.766 $\pm$ 0.001},0.809 $\pm$ 0.001
\rotatebox{90}{\textbf{Cora ML}},Vanilla GCN,0.825 $\pm$ 0.004,0.817 $\pm$ 0.003,0.809 $\pm$ 0.004,0.795 $\pm$ 0.004,0.713 $\pm$ 0.003,0.641 $\pm$ 0.003,\textbf{0.790 $\pm$ 0.003},\textbf{0.697 $\pm$ 0.003},\textbf{0.622 $\pm$ 0.003},0.797 $\pm$ 0.003,0.726 $\pm$ 0.004,0.662 $\pm$ 0.003,0.794 $\pm$ 0.004,0.713 $\pm$ 0.003,0.645 $\pm$ 0.002,0.827 $\pm$ 0.003
\rotatebox{90}{\textbf{Cora ML}},Vanilla GDC,\underline{0.839 $\pm$ 0.003},\underline{0.830 $\pm$ 0.003},0.819 $\pm$ 0.002,0.811 $\pm$ 0.003,0.738 $\pm$ 0.004,\textbf{0.672 $\pm$ 0.005},0.811 $\pm$ 0.003,\textbf{0.737 $\pm$ 0.003},0.677 $\pm$ 0.005,\textbf{0.808 $\pm$ 0.002},0.740 $\pm$ 0.002,0.679 $\pm$ 0.002,\underline{0.814 $\pm$ 0.003},0.739 $\pm$ 0.003,0.674 $\pm$ 0.004,\underline{0.842 $\pm$ 0.003}
\rotatebox{90}{\textbf{Cora ML}},Vanilla PPRGo,0.824 $\pm$ 0.002,0.816 $\pm$ 0.002,0.807 $\pm$ 0.001,0.807 $\pm$ 0.001,0.754 $\pm$ 0.002,0.724 $\pm$ 0.003,0.809 $\pm$ 0.001,0.758 $\pm$ 0.002,0.726 $\pm$ 0.002,\textbf{0.800 $\pm$ 0.002},\textbf{0.748 $\pm$ 0.002},0.704 $\pm$ 0.001,0.802 $\pm$ 0.001,0.748 $\pm$ 0.003,\textbf{0.700 $\pm$ 0.002},0.826 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},Soft Medoid GDC (T=0.5),0.818 $\pm$ 0.002,0.814 $\pm$ 0.002,0.809 $\pm$ 0.002,\textbf{0.808 $\pm$ 0.002},0.784 $\pm$ 0.003,0.773 $\pm$ 0.005,\textbf{0.808 $\pm$ 0.002},0.786 $\pm$ 0.002,0.775 $\pm$ 0.003,0.810 $\pm$ 0.002,\textbf{0.782 $\pm$ 0.003},\textbf{0.759 $\pm$ 0.003},0.810 $\pm$ 0.002,0.783 $\pm$ 0.001,0.761 $\pm$ 0.003,0.819 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},Soft Median GDC (T=1.0),0.831 $\pm$ 0.001,0.824 $\pm$ 0.001,0.817 $\pm$ 0.001,0.812 $\pm$ 0.001,\textbf{0.754 $\pm$ 0.002},0.713 $\pm$ 0.000,0.812 $\pm$ 0.002,0.756 $\pm$ 0.001,0.716 $\pm$ 0.001,\textbf{0.808 $\pm$ 0.001},0.757 $\pm$ 0.001,0.713 $\pm$ 0.002,0.810 $\pm$ 0.001,0.757 $\pm$ 0.001,\textbf{0.710 $\pm$ 0.002},0.834 $\pm$ 0.001


In [31]:
labels_to_plot = [
    'Soft Median GDC (T=0.5)',
    'Soft Median GDC (T=0.2)',
    'Soft Median GDC (T=5.0)',
    'Soft Median RPPRGo (T=5.0)',
    'Soft Median RPPRGo (T=1.0)',
    'Soft Median PPRGo (T=1.0)',
    'Soft Median RPPRGo (T=0.5)',
    'Soft Median RPPRGo (T=0.2)',
    'Soft Median PPRGo (T=10.0)',
    'Soft Median PPRGo (T=20.0)',
    'Vanilla GCN',
    'Vanilla GDC',
    'Vanilla PPRGo',
    'Soft Medoid GDC (T=0.5)',
    'Soft Median GDC (T=1.0)',
    'Soft Medoid RPPRGo (T=0.2)', 
    'Soft Medoid RPPRGo (T=0.5)',
    'Soft Medoid RPPRGo (T=1.0)',
    'SVD GCN',
    'SVD GCN (rank=50)',
    'SVD GCN (rank=100)',
    'Jaccard GCN',
    'RGCN',
    'Vanilla GAT',
    'Soft Median GAT (T=1.0)',
    'Soft Median GAT (T=0.5)',
    'Soft Median GAT (T=0.2)',
]

labels_small = [
    'Soft Median GDC (T=0.2)',
    'Soft Median PPRGo (T=0.5)',
    'Vanilla GCN',
    'Vanilla GDC',
    'Vanilla PPRGo',
    'Vanilla GAT',
    'Soft Medoid GDC (T=0.5)',
    'SVD GCN',
    'Jaccard GCN',
    'RGCN',
]
labels_to_plot_per_dataset = {
    dataset_map['cora_ml']: labels_small,
    dataset_map['citeseer']: labels_small,
    dataset_map['pubmed']: [
        'Soft Median GDC (T=0.2)',
        'Soft Median PPRGo (T=5.0)',
        'Vanilla GCN',
        'Vanilla GDC',
        'Vanilla PPRGo',
        'Soft Medoid GDC (T=0.5)',
    ],
    dataset_map['ogbn-arxiv']: [
        'Soft Median GDC (T=0.2)',
        'Soft Median PPRGo (T=5.0)',
        'Vanilla GCN',
        'Vanilla GDC',
        'Vanilla PPRGo',
        'Soft Medoid GDC (T=0.5)',
    ],
    dataset_map['ogbn-products']: [
        'Soft Median GDC (T=5.0)',
        'Soft Median PPRGo (T=30.0)',
        'Vanilla GCN',
        'Vanilla GDC',
        'Vanilla PPRGo',
        'Soft Medoid GDC (T=0.5)',
    ]
}

label_order = [
    'Soft Median GDC',
    'Soft Median PPRGo',
    'Vanilla GCN',
    'Vanilla GDC',
    'Vanilla PPRGo',
    'Vanilla GAT',
    'Soft Medoid GDC',
    'SVD GCN',
    'Jaccard GCN',
    'RGCN',
]


labels_ours = [
    'Soft Median GDC',
    'Soft Median PPRGo'
]

def transform_label(label: str):
    label = (
        label.replace('\n', ' ')
        .replace(' (T=0.2)','')
        .replace(' (T=0.5)','')
        .replace(' (T=1.0)','')
        .replace(' (T=5.0)','')
        .replace(' (T=30.0)','')
    )
    if label in labels_ours:
        label = rf'\underline{{{label}}}'
    return label

best_attack_mark = r'\textit'
best_defense_mark = r'\textbf'

epsilons = [0.05, 0.1]

In [32]:
df[architecture_c].unique()

array(['Jaccard GCN', 'RGCN', 'SVD GCN (rank=100)', 'SVD GCN',
       'Soft Median GAT (T=0.2)', 'Soft Median GAT (T=0.5)',
       'Soft Median GAT (T=1.0)', 'Soft Median GDC (T=0.2)',
       'Soft Median GDC (T=0.5)', 'Soft Median GDC (T=1.0)',
       'Soft Median PPRGo (T=0.2)', 'Soft Median PPRGo (T=0.5)',
       'Soft Median PPRGo (T=1.0)', 'Soft Medoid GDC (T=0.5)',
       'Soft Medoid PPRGo (T=0.2)', 'Soft Medoid PPRGo (T=0.5)',
       'Soft Medoid PPRGo (T=1.0)', 'Vanilla GAT', 'Vanilla GCN',
       'Vanilla GDC', 'Vanilla PPRGo', 'Soft Median PPRGo (T=5.0)',
       'Soft Median GDC (T=5.0)', 'Soft Median PPRGo (T=10.0)',
       'Soft Median PPRGo (T=20.0)', 'Soft Median PPRGo (T=30.0)'],
      dtype=object)

In [33]:
df_tmp = df[df.apply(lambda row: row[architecture_c] in labels_to_plot_per_dataset[row[dataset_c]], axis=1)]
df_tmp[architecture_c] = df_tmp[architecture_c].apply(lambda label: transform_label(label)).astype("category")
df_tmp[architecture_c].cat.set_categories(
    list(dict.fromkeys([transform_label(l) for l in label_order]).keys()),
    inplace=True
)
df_tmp[dataset_c] = df_tmp[dataset_c].apply(lambda cat: rf'\rotatebox{{90}}{{{cat}}}').astype("category")
df_tmp[dataset_c].cat.set_categories(
    [rf'\rotatebox{{90}}{{{cat}}}' for cat in dataset_order],
    inplace=True
)

#df = df.sort_values([dataset_c, architecture_c])

df_tmp = df_tmp[df_tmp[epsilons_c].isin(epsilons)].copy()


df_piv = pd.pivot_table(
    df_tmp, 
    index=[dataset_c, architecture_c], 
    columns=[attack_c, epsilons_c], #['type', attack_c, ' '], 
    values='accuracy_str',
    aggfunc=lambda x: ' '.join(x)
)

df_piv = df_piv[[c for attack in attack_order for c in df_piv.columns if c[0] == attack]]

df_accuracy_tmp = df_accuracy_str.copy()
df_accuracy_tmp = df_accuracy_tmp[[
    i[1] in labels_to_plot_per_dataset[i[0].replace(r'\rotatebox{90}{', '')[:-1]] 
    for i in df_accuracy_tmp.index
]]
df_accuracy_tmp.index = pd.MultiIndex.from_tuples(
    [(i[0], transform_label(i[1])) for i in df_accuracy_tmp.index],
    names=df_accuracy.index.names
)
df_piv = pd.merge(df_piv, df_accuracy_tmp, on=['   ', architecture_c], how='left')

#df_piv = df_piv.apply(lambda row: row[-1] - row[:-1], axis=1).round(3)
#df_piv = df_piv.round(3)

#print(df_piv.to_markdown().replace(' ', ''))

best_defenses = []
for i in np.array(df_piv.index.get_level_values(0).unique()):
    df_slice = df_piv.loc[(i, slice(None))]
    best_defenses.append([df_slice[col].argsort()[-1] for col in df_slice])

df_piv = df_piv.applymap(lambda val: str(val) if val == val else val)
for epsilon in epsilons:
    current_columns = [c for c in df_piv.columns if c[1] == epsilon]
    df_piv[current_columns] = df_piv[current_columns].groupby(dataset_c).apply(
        partial(mark_best_and_second_best, first_mark=best_attack_mark, second_mark='', axis=1, is_higher_better=False)
    )

for i, bests in zip(np.array(df_piv.index.get_level_values(0).unique()), best_defenses):
    df_slice = df_piv.loc[(i, slice(None))]
    for col, best in zip(df_slice, bests):
        if best == -1:
            continue
        df_slice.loc[df_slice.index[best], col] = rf'{best_defense_mark}{{{df_slice.loc[df_slice.index[best], col]}}}'
    

#df_piv = pd.merge(df_piv, df_accuracy.round(2), on=['   ', architecture_c], how='left')
df_piv = df_piv.fillna('-')
df_piv

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tmp[architecture_c] = df_tmp[architecture_c].apply(lambda label: transform_label(label)).astype("category")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tmp[dataset_c] = df_tmp[dataset_c].apply(lambda cat: rf'\rotatebox{{90}}{{{cat}}}').astype("category")


Unnamed: 0_level_0,\textbf{Attack},\textbf{DICE},\textbf{DICE},\textbf{FGSM},\textbf{FGSM},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\textbf{PGD},\textbf{PGD},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\textbf{Acc.}
Unnamed: 0_level_1,Frac. edges \(\boldsymbol{\epsilon}\),0.05,0.1,0.05,0.1,0.05,0.1,0.05,0.1,0.05,0.1,Unnamed: 12_level_1
Unnamed: 0_level_2,\textbf{Architecture},Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
\rotatebox{90}{\textbf{Cora ML}},\underline{Soft Median GDC},0.816 $\pm$ 0.002,0.813 $\pm$ 0.002,0.784 $\pm$ 0.001,0.769 $\pm$ 0.002,0.783 $\pm$ 0.001,0.765 $\pm$ 0.001,0.779 $\pm$ 0.002,0.758 $\pm$ 0.002,\textit{0.777 $\pm$ 0.000},\textit{0.752 $\pm$ 0.002},0.824 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},\underline{Soft Median PPRGo},0.814 $\pm$ 0.002,0.804 $\pm$ 0.001,\textbf{0.792 $\pm$ 0.000},\textbf{0.778 $\pm$ 0.001},\textbf{0.793 $\pm$ 0.002},\textbf{0.781 $\pm$ 0.002},\textbf{\textit{0.787 $\pm$ 0.001}},\textbf{\textit{0.769 $\pm$ 0.001}},\textbf{0.787 $\pm$ 0.002},\textbf{0.770 $\pm$ 0.001},0.821 $\pm$ 0.001
\rotatebox{90}{\textbf{Cora ML}},Vanilla GCN,0.817 $\pm$ 0.003,0.809 $\pm$ 0.004,0.713 $\pm$ 0.003,0.641 $\pm$ 0.003,\textit{0.697 $\pm$ 0.003},\textit{0.622 $\pm$ 0.003},0.726 $\pm$ 0.004,0.662 $\pm$ 0.003,0.713 $\pm$ 0.003,0.645 $\pm$ 0.002,0.827 $\pm$ 0.003
\rotatebox{90}{\textbf{Cora ML}},Vanilla GDC,\textbf{0.830 $\pm$ 0.003},\textbf{0.819 $\pm$ 0.002},0.738 $\pm$ 0.004,\textit{0.672 $\pm$ 0.005},\textit{0.737 $\pm$ 0.003},0.677 $\pm$ 0.005,0.740 $\pm$ 0.002,0.679 $\pm$ 0.002,0.739 $\pm$ 0.003,0.674 $\pm$ 0.004,\textbf{0.842 $\pm$ 0.003}
\rotatebox{90}{\textbf{Cora ML}},Vanilla PPRGo,0.816 $\pm$ 0.002,0.807 $\pm$ 0.001,0.754 $\pm$ 0.002,0.724 $\pm$ 0.003,0.758 $\pm$ 0.002,0.726 $\pm$ 0.002,\textit{0.748 $\pm$ 0.002},0.704 $\pm$ 0.001,0.748 $\pm$ 0.003,\textit{0.700 $\pm$ 0.002},0.826 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},Vanilla GAT,0.788 $\pm$ 0.002,0.772 $\pm$ 0.003,\textit{0.732 $\pm$ 0.003},0.686 $\pm$ 0.002,0.735 $\pm$ 0.003,0.689 $\pm$ 0.003,0.737 $\pm$ 0.001,0.688 $\pm$ 0.001,0.740 $\pm$ 0.001,\textit{0.686 $\pm$ 0.001},0.809 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},Soft Medoid GDC,0.814 $\pm$ 0.002,0.809 $\pm$ 0.002,0.784 $\pm$ 0.003,0.773 $\pm$ 0.005,0.786 $\pm$ 0.002,0.775 $\pm$ 0.003,\textit{0.782 $\pm$ 0.003},\textit{0.759 $\pm$ 0.003},0.783 $\pm$ 0.001,0.761 $\pm$ 0.003,0.819 $\pm$ 0.002
\rotatebox{90}{\textbf{Cora ML}},SVD GCN,0.766 $\pm$ 0.005,0.752 $\pm$ 0.003,0.770 $\pm$ 0.006,0.751 $\pm$ 0.007,0.769 $\pm$ 0.004,0.755 $\pm$ 0.006,\textit{0.753 $\pm$ 0.004},\textit{0.719 $\pm$ 0.005},0.757 $\pm$ 0.004,0.724 $\pm$ 0.006,0.781 $\pm$ 0.005
\rotatebox{90}{\textbf{Cora ML}},Jaccard GCN,0.809 $\pm$ 0.003,0.803 $\pm$ 0.003,0.722 $\pm$ 0.002,\textit{0.661 $\pm$ 0.002},\textit{0.719 $\pm$ 0.001},0.664 $\pm$ 0.001,0.730 $\pm$ 0.003,0.673 $\pm$ 0.002,0.725 $\pm$ 0.001,0.667 $\pm$ 0.003,0.818 $\pm$ 0.003
\rotatebox{90}{\textbf{Cora ML}},RGCN,0.808 $\pm$ 0.002,0.796 $\pm$ 0.003,\textit{0.719 $\pm$ 0.004},\textit{0.654 $\pm$ 0.007},0.725 $\pm$ 0.002,0.665 $\pm$ 0.005,0.725 $\pm$ 0.005,0.671 $\pm$ 0.007,0.724 $\pm$ 0.003,0.664 $\pm$ 0.004,0.819 $\pm$ 0.002


In [34]:
print(df_piv\
    .sort_index(level=1, ascending=False, sort_remaining=False)\
    .sort_index(level=0, ascending=True, sort_remaining=True)\
    .to_latex(
        #float_format='%.2f',
        caption=r'Perturbed accuracy for the proposed attacks (see Sections~\ref{sec:attackkdd}-\ref{sec:prbcd}) and baselines on all datasets (see Table~\ref{tab:datasets}). \(\epsilon\) denotes the fraction of edges perturbed (relative to the clean graph). The last column contains the clean accuracy. As this a work-in-progress report, the experiments for the defenses on the large datasets are due and on Products we did not optimize the hyperparameters for GANG. For each architecture we italicize the strongest attack where \(\epsilon=0.05\), underline where \(\epsilon=0.1\), and embolden where \(\epsilon=0.25\). From an attack perspective, a lower perturbed accuracy is better. We rerun the experiments with three different seeds. For OGB we use the provided data splits and otherwise we use random split with 20 nodes per class.', 
        label='tab:global',
        escape=False,
        multirow=True,
        multicolumn=True,
        column_format='llcccccccccccccccc'
    )
)

\begin{table}
\centering
\caption{Perturbed accuracy for the proposed attacks (see Sections~\ref{sec:attackkdd}-\ref{sec:prbcd}) and baselines on all datasets (see Table~\ref{tab:datasets}). \(\epsilon\) denotes the fraction of edges perturbed (relative to the clean graph). The last column contains the clean accuracy. As this a work-in-progress report, the experiments for the defenses on the large datasets are due and on Products we did not optimize the hyperparameters for GANG. For each architecture we italicize the strongest attack where \(\epsilon=0.05\), underline where \(\epsilon=0.1\), and embolden where \(\epsilon=0.25\). From an attack perspective, a lower perturbed accuracy is better. We rerun the experiments with three different seeds. For OGB we use the provided data splits and otherwise we use random split with 20 nodes per class.}
\label{tab:global}
\begin{tabular}{llcccccccccccccccc}
\toprule
                                  & \textbf{Attack} & \multicolumn{2}{l}{\textbf

# Plot Results (joint)

In [35]:
epsilons_c = r'Frac. edges $\epsilon$'

In [36]:
df_results[(df_results.dataset == 'ogbn-products')].label.unique()

array(['Soft Median RPPRGo (T=0.2)', 'Vanilla GCN',
       'Vanilla GCN (surrogate)', 'Vanilla GDC',
       'Soft Median GDC (T=5.0)', 'Soft Median GDC (T=10.0)',
       'Vanilla PPRGo', 'Soft Median PPRGo (T=1.0)',
       'Soft Median RPPRGo (T=1.0)', 'Soft Median PPRGo (T=5.0)',
       'Soft Median PPRGo (T=20.0)', 'Soft Median PPRGo (T=10.0)',
       'Soft Median PPRGo (T=30.0)'], dtype=object)

In [37]:
labels_small = [
    'Soft Median GDC (T=0.2)',
    'Soft Median RPPRGo (T=0.5)',
    'Vanilla GCN',
    'Vanilla GDC',
    'Vanilla PPRGo',
    'Soft Medoid GDC (T=0.5)',
    'SVD GCN (rank=50)',
    #'Jaccard GCN',
    #'RGCN',
]

labels_big = [
    'Soft Median GDC (T=0.2)',
    'Soft Median GDC (T=5.0)',
    'Soft Median RPPRGo (T=5.0)',
    #'Soft Median PPRGo (T=1.0)',
    #'Soft Median PPRGo (T=10.0)',
    #'Soft Median PPRGo (T=20.0)',
    'Soft Median PPRGo (T=30.0)',
    'Vanilla GCN',
    'Vanilla GDC',
    'Vanilla PPRGo',
    'Soft Medoid GDC (T=0.5)',
    #'Jaccard GCN',
    #'RGCN',
]

attacks = ['FGSM', 'GreedyRBCD', 'PGD', 'PRBCD']

reference_attack = 'DICE'

reference_approaches = [
    #'Soft Median RPPRGo (T=0.5)',
    #'Soft Median RPPRGo (T=5.0)',
    #'Soft Median PPRGo (T=1.0)',
    #'Soft Median PPRGo (T=10.0)',
    #'Soft Median PPRGo (T=20.0)',
    'Vanilla GCN'
]

for label in labels:        
    if df_group[df_group.label == label].shape[0] == 0 or (dataset == 'ogbn-arxiv' and label == 'Soft Median GDC (T=5.0)'):
        continue
    print(label)
    print(df_group[df_group.label == label].groupby('epsilon').accuracy.agg(['mean', 'sem']).reset_index())

In [38]:
for (dataset, attack), df_group in df_results[
    (df_results['novel_loss']
     | (df_results['attack'] == 'DICE')
     | (df_results['attack'] == 'GANG'))
    #& ~((df_results['dataset'] == 'ogbn-products') & (df_results['label'] == 'Soft Median GDC (T=0.2)'))
    #& ~((df_results['dataset'] != 'ogbn-products') & (df_results['label'] == 'Soft Median GDC (T=0.5)'))
    #& ((df_results['dataset'] != 'ogbn-arxiv') | (df_results['label'] != 'Soft Median GDC (T=5.0)'))
    & df_results['attack'].isin(attacks)
    # Filter out the SVD outlier where it fails catastrophically
    #& ((df_results.dataset != 'citeseer') | (df_results.label != 'SVD GCN') | (df_results.accuracy > 0.1))
].groupby(['dataset', 'attack']):       
    print(dataset, attack)
    
    fig, ax = mpl_latex.newfig(width=0.25, ratio_yx=1)

    is_small = dataset in ['cora_ml', 'citeseer']
    labels = labels_small if is_small else labels_big
    if not is_small:
        df_group = df_group[df_group['epsilon'] <= 0.1]
    else:
        df_group = df_group[df_group['epsilon'] <= 0.25]
  
    for label in labels:        
        if df_group[df_group.label == label].shape[0] == 0 or (dataset == 'ogbn-arxiv' and label == 'Soft Median GDC (T=5.0)'):
            continue

        df_values = df_group[df_group.label == label].groupby('epsilon').accuracy.agg(['mean', 'sem']).reset_index()
        
        #if label.startswith('Soft Median'):
        #    label = fr'\underline{{{label}}}'
        
        err_line = plt.errorbar(
            df_values.epsilon,
            df_values['mean'],
            df_values['sem'],
            label=label.replace('RPPRGo', 'PPRGo').replace(' (T=5.0)', '').replace(' (T=0.5)', '')\
                .replace(' (T=0.2)', '').replace(' (rank=50)', '').replace(' (T=30.0)', '')
        )
        
        if label in reference_approaches:
            reference_line = df_results[
                (df_results.attack == reference_attack) & (df_results.dataset == dataset) & (df_results.label == label)
            ].groupby(['label', 'epsilon']).accuracy.agg(['mean', 'sem']).reset_index()
            if not is_small:
                reference_line = reference_line[reference_line['epsilon'] <= 0.1]
            else:
                reference_line = reference_line[reference_line['epsilon'] <= 0.25]
            plt.errorbar(reference_line.epsilon, reference_line['mean'], reference_line['sem'], color=err_line[0].get_color(), linestyle='--')
    
    df_range = df_group.groupby('epsilon').accuracy.agg(['min', 'max']).reset_index()
    df_range['dataset'] = dataset
    df_range['attack'] = attack    
    
    # For "DICE-area"
    #reference_area = df_results[
    #    (df_results.attack == reference_attack) & (df_results.dataset == dataset) & df_results.label.isin(labels)
    #].groupby(['label', 'epsilon']).accuracy.mean().reset_index()\
    #.groupby('epsilon').accuracy.agg(['min', 'max']).reset_index()
    #if not is_small:
    #    reference_area = reference_area[reference_area['epsilon'] <= 0.1]
    #plt.fill_between(reference_area.epsilon, reference_area['min'], reference_area['max'], 
    #                 zorder=0, color='grey', alpha=0.2)
    
    plt.xlabel(epsilons_c)
    plt.ylabel('Pert. accuracy')
    
    mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_pertaccuracy_no_legend', fig, close_fig=False)
    ax.legend()
    
    mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_pertaccuracy', fig, close_fig=False)
    mpl_latex.dedicated_legend_plot(
        f'latex/assets/global_transfer_{attack}_{dataset}_pertaccuracy_legend_1row',
        mod_label=lambda l: l,
        ncol=8 if is_small else 7
    )
    mpl_latex.dedicated_legend_plot(
        f'latex/assets/global_transfer_{attack}_{dataset}_pertaccuracy_legend',
        mod_label=lambda l: l,
        ncol=4 if is_small else 3
    )
    mpl_latex.dedicated_legend_plot(
        f'latex/assets/global_transfer_{attack}_{dataset}_pertaccuracy_legend_1col',
        mod_label=lambda l: l,
        ncol=1
    )

citeseer FGSM


  self[key] = other[key]


citeseer GreedyRBCD


  self[key] = other[key]


citeseer PGD


  self[key] = other[key]


citeseer PRBCD


  self[key] = other[key]


cora_ml FGSM


  self[key] = other[key]


cora_ml GreedyRBCD


  self[key] = other[key]


cora_ml PGD


  self[key] = other[key]


cora_ml PRBCD


  self[key] = other[key]


ogbn-arxiv GreedyRBCD


  self[key] = other[key]


ogbn-arxiv PRBCD


  self[key] = other[key]


ogbn-products GreedyRBCD


  self[key] = other[key]


ogbn-products PRBCD


  self[key] = other[key]


pubmed GreedyRBCD


  self[key] = other[key]


pubmed PRBCD


  self[key] = other[key]


## Split table for small datasets

In [39]:
small_datasets = [dataset_map['cora_ml'], dataset_map['citeseer']]
small_datasets

['\\textbf{Cora ML}', '\\textbf{Citeseer}']

In [40]:
labels_to_plot = [
    'Soft Median GDC (T=0.2)',
    'Soft Median PPRGo (T=0.5)',
    'Vanilla GCN',
    'Vanilla GDC',
    'Vanilla PPRGo',
    'Soft Medoid GDC (T=0.5)',
    'Soft Medoid RPPRGo (T=0.5)',
    'SVD GCN',
    'Jaccard GCN',
    'RGCN',
]

In [41]:
epsilons_c = r'Frac. edges \(\boldsymbol{\epsilon}\)'
epsilons = [0.1]
best_attack_mark = r'\underline'
best_defense_mark = r'\textbf'

architecture_c = r'\textbf{Architecture}'
dataset_c = r'   '
attack_c = r'\textbf{Attack}'
epsilons_c = r'Frac. edges \(\boldsymbol{\epsilon}\)'
accuracy_c = r'\textbf{Acc.}'

epsilons = [0.01, 0.05, 0.1] #[0.01, 0.05, 0.1, 0.25]
best_attack_mark = r'\textbf'
best_defense_mark = r'\underline'

In [42]:
labels_ours = [
    'Soft Median GDC',
    'Soft Median PPRGo'
]

def transform_labels(label: str) -> str:
    label = label.replace(' (T=0.5)', '').replace(' (T=0.2)', '')
    if label in labels_ours:
        label = rf'\underline{{{label}}}'
    return label

In [43]:
df_tmp = df[
    df[epsilons_c].isin(epsilons)
    & ~df[attack_c].isin([attack_map['DICE']])
    & df[architecture_c].isin(labels_to_plot)
    & df[dataset_c].isin(small_datasets)
]

df_tmp[architecture_c] = df_tmp[architecture_c].apply(transform_labels).astype("category")
df_tmp[architecture_c].cat.set_categories(
    [transform_labels(l) for l in labels_to_plot],
    inplace=True
)

df_small = pd.pivot_table(
    df_tmp,
    index=[dataset_c, architecture_c], 
    columns=[attack_c, epsilons_c], #['type', attack_c, ' '],
    values='accuracy_str',
    aggfunc=lambda x: ' '.join(x)
)

df_small = df_small[[c for attack in attack_order for c in df_small.columns if c[0] == attack]]

df_accuracy_tmp = df_accuracy_str.copy()
df_accuracy_tmp = df_accuracy_tmp[[i[1] in labels_to_plot for i in df_accuracy_tmp.index]]
df_accuracy_tmp.index = pd.MultiIndex.from_tuples(
    [(i[0], transform_labels(i[1])) for i in df_accuracy_tmp.index],
    names=df_accuracy.index.names
)
df_small = pd.merge(df_small, df_accuracy_tmp, on=[dataset_c, architecture_c], how='left')

#df_small = df_small.apply(lambda row: row[-1] - row[:-1], axis=1).round(3)
df_small = df_small.round(3)

print(df_small.to_markdown().replace(' ', ''))

best_defenses = []
for i in np.array(df_small.index.get_level_values(0).unique()):
    df_slice = df_small.loc[(i, slice(None))]
    best_defenses.append([df_slice[col].argsort()[-1] for col in df_slice])

df_small = df_small.applymap(lambda val: str(val) if val == val else val)
#for epsilon in epsilons:
#    current_columns = [c for c in df_small.columns if c[1] == epsilon]
#    df_small[current_columns] = df_small[current_columns].groupby(dataset_c).apply(
#        partial(mark_best_and_second_best, first_mark=best_attack_mark, second_mark='', axis=1, is_higher_better=False)
#    )

for i, bests in zip(np.array(df_small.index.get_level_values(0).unique()), best_defenses):
    df_slice = df_small.loc[(i, slice(None))]
    for col, best in zip(df_slice, bests):
        if best == -1:
            continue
        df_slice.loc[df_slice.index[best], col] = rf'{best_defense_mark}{{{df_slice.loc[df_slice.index[best], col]}}}'
    

#df_small = pd.merge(df_small, df_accuracy.round(2), on=['   ', architecture_c], how='left')
df_small = df_small.fillna('-')
df_small

||('\\textbf{FGSM}',0.1)|('\\underline{\\textbf{GR-BCD}}',0.1)|('\\textbf{PGD}',0.1)|('\\underline{\\textbf{PR-BCD}}',0.1)|('\\textbf{Acc.}','')|
|:---------------------------------------------------------|:--------------------------|:-----------------------------------------|:-------------------------|:-----------------------------------------|--------------------------:|
|('\\textbf{Citeseer}','\\underline{SoftMedianGDC}')|0.676$\pm$0.002|0.681$\pm$0.002|0.675$\pm$0.002|0.667$\pm$0.003|nan|
|('\\textbf{Citeseer}','\\underline{SoftMedianPPRGo}')|0.685$\pm$0.007|0.690$\pm$0.006|0.692$\pm$0.007|0.687$\pm$0.006|nan|
|('\\textbf{Citeseer}','VanillaGCN')|0.574$\pm$0.004|0.550$\pm$0.001|0.594$\pm$0.002|0.568$\pm$0.004|nan|
|('\\textbf{Citeseer}','VanillaGDC')|0.562$\pm$0.003|0.560$\pm$0.004|0.581$\pm$0.003|0.562$\pm$0.004|nan|
|('\\textbf{Citeseer}','VanillaPPRGo')|0.644$\pm$0.009|0.662$\pm$0.004|0.649$\pm$0.003|0.644$\pm$0.007|nan|
|('\\textbf{Citeseer}','SoftMedoidGDC')|0.682$\pm$0.003|0.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tmp[architecture_c] = df_tmp[architecture_c].apply(transform_labels).astype("category")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


Unnamed: 0_level_0,\textbf{Attack},\textbf{FGSM},\underline{\textbf{GR-BCD}},\textbf{PGD},\underline{\textbf{PR-BCD}},\textbf{Acc.}
Unnamed: 0_level_1,Frac. edges \(\boldsymbol{\epsilon}\),0.1,0.1,0.1,0.1,Unnamed: 6_level_1
Unnamed: 0_level_2,\textbf{Architecture},Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
\textbf{Citeseer},\underline{Soft Median GDC},0.676 $\pm$ 0.002,0.681 $\pm$ 0.002,0.675 $\pm$ 0.002,0.667 $\pm$ 0.003,-
\textbf{Citeseer},\underline{Soft Median PPRGo},\textbf{0.685 $\pm$ 0.007},\textbf{0.690 $\pm$ 0.006},\textbf{0.692 $\pm$ 0.007},\textbf{0.687 $\pm$ 0.006},-
\textbf{Citeseer},Vanilla GCN,0.574 $\pm$ 0.004,0.550 $\pm$ 0.001,0.594 $\pm$ 0.002,0.568 $\pm$ 0.004,-
\textbf{Citeseer},Vanilla GDC,0.562 $\pm$ 0.003,0.560 $\pm$ 0.004,0.581 $\pm$ 0.003,0.562 $\pm$ 0.004,-
\textbf{Citeseer},Vanilla PPRGo,0.644 $\pm$ 0.009,0.662 $\pm$ 0.004,0.649 $\pm$ 0.003,0.644 $\pm$ 0.007,-
\textbf{Citeseer},Soft Medoid GDC,0.682 $\pm$ 0.003,0.683 $\pm$ 0.002,0.677 $\pm$ 0.003,0.675 $\pm$ 0.003,-
\textbf{Citeseer},SVD GCN,0.617 $\pm$ 0.012,0.615 $\pm$ 0.011,0.599 $\pm$ 0.013,0.604 $\pm$ 0.009,-
\textbf{Citeseer},Jaccard GCN,0.622 $\pm$ 0.006,0.616 $\pm$ 0.003,0.630 $\pm$ 0.003,0.609 $\pm$ 0.005,-
\textbf{Citeseer},RGCN,0.568 $\pm$ 0.005,0.584 $\pm$ 0.004,0.589 $\pm$ 0.004,0.583 $\pm$ 0.006,-
\textbf{Cora ML},\underline{Soft Median GDC},0.769 $\pm$ 0.002,0.765 $\pm$ 0.001,0.758 $\pm$ 0.002,0.752 $\pm$ 0.002,-


df_small = df_piv.loc[(slice(*small_datasets), slice(None))].drop(columns=drop_attacks)
df_small

In [44]:
print(df_small\
    .sort_index(level=1, ascending=False, sort_remaining=False)\
    .sort_index(level=0, ascending=True, sort_remaining=True)\
    .to_latex(
        #float_format='%.2f',
        caption=r'{Comparing attacks and defenses via the perturbed accuracy. The last column contains the test accuracy. \(\epsilon\) denotes the fraction of edges perturbed. A lower perturbed accuracy implies a stronger attach and for a defense vice versa. As attack perform similarly, we only highlight the strongest defense. Our approaches are underlined.', 
        label='tab:global_small',
        escape=False,
        multirow=True,
        multicolumn=True,
        column_format='llccccc'
    )
)

\begin{table}
\centering
\caption{{Comparing attacks and defenses via the perturbed accuracy. The last column contains the test accuracy. \(\epsilon\) denotes the fraction of edges perturbed. A lower perturbed accuracy implies a stronger attach and for a defense vice versa. As attack perform similarly, we only highlight the strongest defense. Our approaches are underlined.}
\label{tab:global_small}
\begin{tabular}{llccccc}
\toprule
                 & \textbf{Attack} &               \textbf{FGSM} & \underline{\textbf{GR-BCD}} &                \textbf{PGD} & \underline{\textbf{PR-BCD}} & \textbf{Acc.} \\
                 & Frac. edges \(\boldsymbol{\epsilon}\) &                         0.1 &                         0.1 &                         0.1 & \multicolumn{2}{l}{0.1} \\
    & \textbf{Architecture} &                             &                             &                             &                             &               \\
\midrule
\multirow{9}{*}{\textbf{Citeseer}} & \

## Split table for large datasets

In [45]:
large_datasets = [rf'\rotatebox{{90}}{{{d}}}' for d in [dataset_map['pubmed'], dataset_map['ogbn-arxiv'], dataset_map['ogbn-products']]]
large_datasets

['\\rotatebox{90}{\\textbf{PubMed}}',
 '\\rotatebox{90}{\\textbf{arXiv}}',
 '\\rotatebox{90}{\\textbf{Products}}']

In [46]:
drop_attacks = [attack_map['FGSM'], attack_map['PGD']]
drop_attacks

['\\textbf{FGSM}', '\\textbf{PGD}']

In [47]:
df_large = df_piv.loc[(slice(large_datasets[0], large_datasets[-1]), slice(None))].drop(columns=drop_attacks)
df_large

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,\textbf{Attack},\textbf{DICE},\textbf{DICE},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\textbf{Acc.}
Unnamed: 0_level_1,Frac. edges \(\boldsymbol{\epsilon}\),0.05,0.1,0.05,0.1,0.05,0.1,Unnamed: 8_level_1
Unnamed: 0_level_2,\textbf{Architecture},Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
\rotatebox{90}{\textbf{PubMed}},\underline{Soft Median GDC},0.761 $\pm$ 0.002,0.752 $\pm$ 0.003,\textit{0.721 $\pm$ 0.004},\textit{0.693 $\pm$ 0.005},0.730 $\pm$ 0.005,0.708 $\pm$ 0.005,0.769 $\pm$ 0.002
\rotatebox{90}{\textbf{PubMed}},\underline{Soft Median PPRGo},0.764 $\pm$ 0.001,0.752 $\pm$ 0.002,\textbf{\textit{0.723 $\pm$ 0.000}},\textbf{0.694 $\pm$ 0.001},0.727 $\pm$ 0.000,\textit{0.692 $\pm$ 0.000},0.776 $\pm$ 0.002
\rotatebox{90}{\textbf{PubMed}},Vanilla GCN,0.766 $\pm$ 0.003,0.751 $\pm$ 0.002,\textit{0.661 $\pm$ 0.003},\textit{0.592 $\pm$ 0.004},0.686 $\pm$ 0.004,0.620 $\pm$ 0.003,\textbf{0.781 $\pm$ 0.003}
\rotatebox{90}{\textbf{PubMed}},Vanilla GDC,0.766 $\pm$ 0.003,0.748 $\pm$ 0.002,\textit{0.680 $\pm$ 0.004},\textit{0.620 $\pm$ 0.005},0.696 $\pm$ 0.004,0.645 $\pm$ 0.005,0.781 $\pm$ 0.002
\rotatebox{90}{\textbf{PubMed}},Vanilla PPRGo,0.717 $\pm$ 0.001,0.721 $\pm$ 0.007,0.714 $\pm$ 0.001,0.673 $\pm$ 0.002,\textit{0.704 $\pm$ 0.007},\textit{0.658 $\pm$ 0.004},0.765 $\pm$ 0.008
\rotatebox{90}{\textbf{PubMed}},Soft Medoid GDC,\textbf{0.766 $\pm$ 0.003},\textbf{0.756 $\pm$ 0.003},\textit{0.722 $\pm$ 0.004},\textit{0.693 $\pm$ 0.005},\textbf{0.732 $\pm$ 0.004},\textbf{0.711 $\pm$ 0.005},0.774 $\pm$ 0.003
\rotatebox{90}{\textbf{arXiv}},\underline{Soft Median GDC},0.645 $\pm$ 0.002,0.629 $\pm$ 0.002,0.504 $\pm$ 0.003,0.462 $\pm$ 0.001,\textit{0.479 $\pm$ 0.002},\textit{0.420 $\pm$ 0.005},0.666 $\pm$ 0.002
\rotatebox{90}{\textbf{arXiv}},\underline{Soft Median PPRGo},0.669 $\pm$ 0.001,0.654 $\pm$ 0.001,\textbf{0.606 $\pm$ 0.001},\textbf{0.589 $\pm$ 0.002},\textbf{\textit{0.598 $\pm$ 0.001}},\textbf{\textit{0.567 $\pm$ 0.002}},0.684 $\pm$ 0.001
\rotatebox{90}{\textbf{arXiv}},Vanilla GCN,\textbf{0.690 $\pm$ 0.004},\textbf{0.671 $\pm$ 0.004},0.361 $\pm$ 0.003,0.292 $\pm$ 0.005,\textit{0.351 $\pm$ 0.003},\textit{0.235 $\pm$ 0.006},\textbf{0.706 $\pm$ 0.004}
\rotatebox{90}{\textbf{arXiv}},Vanilla GDC,0.672 $\pm$ 0.001,0.648 $\pm$ 0.001,0.446 $\pm$ 0.001,0.390 $\pm$ 0.001,\textit{0.399 $\pm$ 0.001},\textit{0.297 $\pm$ 0.003},0.701 $\pm$ 0.001


In [48]:
print(df_large\
    .sort_index(level=1, ascending=False, sort_remaining=False)\
    .sort_index(level=0, ascending=True, sort_remaining=True)\
    .to_latex(
        #float_format='%.2f',
        caption=r'\todo{Sth}', 
        label='tab:global_small',
        escape=False,
        multirow=True,
        multicolumn=True,
        column_format='llcccccccccc'
    )
)

\begin{table}
\centering
\caption{\todo{Sth}}
\label{tab:global_small}
\begin{tabular}{llcccccccccc}
\toprule
                                  & \textbf{Attack} & \multicolumn{2}{l}{\textbf{DICE}} & \multicolumn{2}{l}{\underline{\textbf{GR-BCD}}} & \multicolumn{2}{l}{\underline{\textbf{PR-BCD}}} &               \textbf{Acc.} \\
                                  & Frac. edges \(\boldsymbol{\epsilon}\) &                        0.05 &                         0.1 &                                 0.05 &                         0.1 &                                 0.05 & \multicolumn{2}{l}{0.1} \\
    & \textbf{Architecture} &                             &                             &                                      &                             &                                      &                                      &                             \\
\midrule
\multirow{6}{*}{\rotatebox{90}{\textbf{PubMed}}} & \underline{Soft Median GDC} &           0.761 $\pm$ 0.002 &           

# Compare losses

In [49]:
architecture_c = r'\textbf{Architecture}'
dataset_c = r'   '
attack_c = r'\rotatebox{90}{\textbf{Attack}}'
epsilons_c = r'\makecell{\textbf{Frac.}\\\textbf{edges}\\\(\boldsymbol{\epsilon}\)}'

epsilons = [0.01, 0.05, 0.1, 0.25]
epsilon_marks = ['', r'\textit', r'\underline', r'\textbf']

In [50]:
loss_c = r'\textbf{Loss}'

loss_map = {
    'CE': 'CE',
    'Margin': 'Margin',
    'CW': 'CW',
    'SCE': 'NCE',
    'eluMargin': 'elu Margin',
    'MCE': r'\underline{MCE}',
    'tanhMargin': r'\underline{tanh Margin}',
    #'tanhMarginCW-0.25': '0.25 tanh Margin + 0.75 CW',
    #'tanhMarginCW-0.5': '0.5 tanh Margin + 0.5 CW',
    #'tanhMarginCW-0.625': '0.625 tanh Margin + 0.375 CW',
    #'tanhMarginCW-0.75': '0.75 tanh Margin + 0.25 CW',
    #'tanhMarginCW-0.9': '0.9 tanh Margin + 0.1 CW',
    #'tanhMarginCW-0.95': '0.95 tanh Margin + 0.05 CW',
    #'tanhMarginMCE-0.25': '0.25 tanh Margin + 0.75 MCE',
    'tanhMarginMCE-0.5': r'\makecell{\underline{0.5 tanh Margin}\\\underline{+ 0.5 MCE}}',
    #'tanhMarginMCE-0.625': '0.625 tanh Margin + 0.375 MCE',
    'tanhMarginMCE-0.75': r'\makecell{\underline{0.75 tanh Margin}\\\underline{+ 0.25 MCE}}',
    #'tanhMarginMCE-0.85': '0.85 tanh Margin + 0.15 MCE',
    #'tanhMarginMCE-0.9': '0.9 tanh Margin + 0.1 MCE',
    #'tanhMarginMCE-0.95': '0.95 tanh Margin + 0.05 MCE',
}


def loss_str(loss: str) -> str:
    if isinstance(loss, str):
        if loss in loss_map:
            return loss_map[loss]
        else:
            'NA'
    else:
        return loss_map['CE']

def transform_label(label: str) -> str:
    label = label.replace(' (T=0.5)', '').replace(' (T=0.2)', '').replace('RPPRGo', 'PPRGo')
    if label in labels_ours:
        label = r'\\'.join([rf'\underline{{{part}}}' for part in label.split(' ')])
    else:
        label = label.replace(' ', r'\\')
    return f'\makecell{{{label}}}'

n_losses = 8

In [51]:
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, df_experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = df_experiment['config.dataset']
    df_result['attack'] = df_experiment['config.attack']
    df_result['seed'] = df_experiment['config.seed']
    df_result['batch_id'] = df_experiment['batch_id']
    df_result['loss'] = loss_str(df_experiment['config.attack_params.loss_type'])
    df_result['lr'] = df_experiment['config.attack_params.base_lr']
    
df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('batch_id')
df_results = df_results.drop_duplicates([
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_results.label = df_results.label.apply(lambda label: label.replace('RPPRGo', 'PPRGo'))

df_results

Unnamed: 0,label,epsilon,accuracy,dataset,attack,seed,batch_id,loss,lr
52862,Vanilla GCN,0.10,0.314199,ogbn-arxiv,PRBCD,0,1,Margin,
52924,Vanilla GCN,0.00,0.689422,ogbn-arxiv,PRBCD,5,1,NCE,
52923,Vanilla GCN,0.25,0.202601,ogbn-arxiv,PRBCD,5,1,CW,
52922,Vanilla GCN,0.10,0.400613,ogbn-arxiv,PRBCD,5,1,CW,
52921,Vanilla GCN,0.05,0.439191,ogbn-arxiv,PRBCD,5,1,CW,
...,...,...,...,...,...,...,...,...,...
52414,Vanilla GAT,0.00,0.815020,cora_ml,PGD,0,36,elu Margin,0.1
52413,Vanilla GAT,1.00,0.250593,cora_ml,PGD,0,36,Margin,0.1
52412,Vanilla GAT,0.50,0.401186,cora_ml,PGD,0,36,Margin,0.1
52418,Vanilla GAT,0.25,0.573913,cora_ml,PGD,0,36,elu Margin,0.1


In [52]:
df = []

for (dataset, attack, label, epsilon, loss), df_group in df_results[
    df_results['attack'].isin(['FGSM', 'PGD'])
    #(df_results['attack'].isin(['FGSM', 'PGD']) 
    # | ((df_results['attack'] == 'PRBCD') & df_results['dataset'].isin(['pubmed', 'ogbn-arxiv'])))
    & df_results['label'].isin(labels_to_plot)
].groupby(['dataset', 'attack', 'label', 'epsilon', 'loss']):
    if len(df_group.seed.unique()) != 3:
        print(f'For {dataset}-{attack}-{epsilon} collected runs for seed {df_group.seed.tolist()}')

    accuracy = calc_mean_and_error(df_group.accuracy, df_group.seed, with_error=True, decimal_places=3)
    df.append({
        dataset_c: dataset_map[dataset],
        architecture_c: transform_label(label),
        attack_c: attack_map[attack],
        epsilons_c: epsilon,
        'accuracy': accuracy,
        loss_c: loss
    })
    
df = pd.DataFrame(df)

df = df[df[epsilons_c].isin(epsilons)]

df[architecture_c] = df[architecture_c].astype("category")
df[architecture_c].cat.set_categories(
    [transform_label(l) for l in labels_to_plot],
    inplace=True
)
df[dataset_c] = df[dataset_c].astype("category")
df[dataset_c].cat.set_categories(
    dataset_order,
    inplace=True
)
df[attack_c] = df[attack_c].astype("category")
df[attack_c].cat.set_categories(
    list(attack_map.values()),
    inplace=True
)
df[loss_c] = df[loss_c].astype("category")
df[loss_c].cat.set_categories(
    list(loss_map.values()),
    inplace=True
)

#df[loss_c] = df[loss_c].cat.rename_categories([
#    rf'\underline{{{cat}}}' if cat in [loss_map[loss] for loss in losses_ours] else cat
#    for cat in df[loss_c].cat.categories
#])
df[attack_c] = df[attack_c].cat.rename_categories([rf'\rotatebox{{90}}{{{cat}}}' for cat in df[attack_c].cat.categories])


df

Unnamed: 0,Unnamed: 1,\textbf{Architecture},\rotatebox{90}{\textbf{Attack}},\makecell{\textbf{Frac.}\\\textbf{edges}\\\(\boldsymbol{\epsilon}\)},accuracy,\textbf{Loss}
8,\textbf{Citeseer},\makecell{Jaccard\\GCN},\rotatebox{90}{\textbf{FGSM}},0.01,0.716 $\pm$ 0.004,CE
9,\textbf{Citeseer},\makecell{Jaccard\\GCN},\rotatebox{90}{\textbf{FGSM}},0.01,0.711 $\pm$ 0.005,CW
10,\textbf{Citeseer},\makecell{Jaccard\\GCN},\rotatebox{90}{\textbf{FGSM}},0.01,0.712 $\pm$ 0.005,Margin
11,\textbf{Citeseer},\makecell{Jaccard\\GCN},\rotatebox{90}{\textbf{FGSM}},0.01,0.714 $\pm$ 0.005,NCE
12,\textbf{Citeseer},\makecell{Jaccard\\GCN},\rotatebox{90}{\textbf{FGSM}},0.01,0.704 $\pm$ 0.004,\makecell{\underline{0.5 tanh Margin}\\\underl...
...,...,...,...,...,...,...
1771,\textbf{Cora ML},\makecell{Vanilla\\PPRGo},\rotatebox{90}{\textbf{PGD}},0.25,0.731 $\pm$ 0.000,NCE
1772,\textbf{Cora ML},\makecell{Vanilla\\PPRGo},\rotatebox{90}{\textbf{PGD}},0.25,0.648 $\pm$ 0.002,\makecell{\underline{0.75 tanh Margin}\\\under...
1773,\textbf{Cora ML},\makecell{Vanilla\\PPRGo},\rotatebox{90}{\textbf{PGD}},0.25,0.752 $\pm$ 0.002,\underline{MCE}
1774,\textbf{Cora ML},\makecell{Vanilla\\PPRGo},\rotatebox{90}{\textbf{PGD}},0.25,0.632 $\pm$ 0.002,\underline{tanh Margin}


In [53]:
for dataset in df[dataset_c].unique():
    dataset_raw = dataset.replace(r'\textbf{', '').replace(r'}', '')
    
    df_tmp = df[df[dataset_c] == dataset]
    df_tmp[epsilons_c] = df_tmp[epsilons_c].apply(lambda epsilon: rf'\rotatebox{{90}}{{$\epsilon={epsilon}$}}')
    df_piv = pd.pivot_table(
        df_tmp, 
        index=[attack_c, epsilons_c, loss_c], 
        columns=[architecture_c],
        values='accuracy',
        aggfunc=lambda x: ' '.join(x)
    )

    #df_piv = df_piv[[c for attack in attack_order for c in df_piv.columns if c[0] == attack]]

    #for epsilon, mark in zip(epsilons, epsilon_marks):
    #    current_columns = [c for c in df_piv.columns if c[1] == epsilon]
    #    df_piv[current_columns] = df_piv[current_columns].groupby(dataset_c).apply(
    #        partial(mark_best_and_second_best, first_mark=mark, second_mark='', axis=1, is_higher_better=False)
    #    )

    #def acc_to_mean_and_error(group: pd.DataFrame, with_error=True, decimal_places: int = 3):
    #    return calc_mean_and_error(group['accuracy'], group['seed'], with_error=with_error, decimal_places=decimal_places)

    #df_piv = df_piv.join(df_accuracy)
    df_piv = df_piv.fillna('-')

    ce_acc = df_piv.xs(loss_map['CE'], level=2, drop_level=False).values
    nonce_acc = np.vstack([
        df_piv.xs(loss_map[loss], level=2, drop_level=False).values
        for loss in loss_map.keys() if loss != loss_map['CE']
    ])

    row_idx = n_losses * np.arange(df_piv.shape[0] // n_losses)
    row_idx = (row_idx[:, None] 
               #+ df_piv.groupby(level=[0,1]).aggregate(lambda col: col.astype(float).argmin()).values
               + df_piv.groupby(level=[0,1]).aggregate(lambda col: col.argsort()[0]).values
              ).flatten()

    col_idx = np.tile(np.arange(df_piv.shape[1]), df_piv.shape[0] // n_losses)

    mask = np.zeros(df_piv.shape).astype(bool)
    mask[row_idx, col_idx] = True

    df_piv = df_piv.mask(
        pd.DataFrame(mask, columns=df_piv.columns, index=df_piv.index),
        df_piv.applymap(lambda elem: rf'{epsilon_marks[-1]}{{{elem}}}')
    )
    df_piv
    
    print()
    print(df_piv\
        .sort_index(level=1, ascending=False, sort_remaining=False)\
        .sort_index(level=0, ascending=True, sort_remaining=True)\
        .to_latex(
            float_format='%.2f',
            caption=fr'{dataset_raw}: Perturbed accuracy comparing the conventional losses with our losses over the different architectures. \(\epsilon\) denotes the fraction of edges perturbed.', 
            label=f'tab:appendix_losscompare_{dataset_raw}',
            escape=False,
            multirow=True,
            multicolumn=True,
            column_format='lcl|' + len(df[df[dataset_c] == dataset][architecture_c].unique()) * 'c'
        )
    )
    print()

df_piv

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tmp[epsilons_c] = df_tmp[epsilons_c].apply(lambda epsilon: rf'\rotatebox{{90}}{{$\epsilon={epsilon}$}}')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_tmp[epsilons_c] = df_tmp[epsilons_c].apply(lambda epsilon: rf'\rotatebox{{90}}{{$\epsilon={epsilon}$}}')



\begin{table}
\centering
\caption{Citeseer: Perturbed accuracy comparing the conventional losses with our losses over the different architectures. \(\epsilon\) denotes the fraction of edges perturbed.}
\label{tab:appendix_losscompare_Citeseer}
\begin{tabular}{lcl|cccccccc}
\toprule
                             &                                 & \textbf{Architecture} & \makecell{\underline{Soft}\\\underline{Median}\\\underline{GDC}} & \makecell{\underline{Soft}\\\underline{Median}\\\underline{PPRGo}} &     \makecell{Vanilla\\GCN} &     \makecell{Vanilla\\GDC} &   \makecell{Vanilla\\PPRGo} & \makecell{Soft\\Medoid\\GDC} &     \makecell{Jaccard\\GCN} &             \makecell{RGCN} \\
\rotatebox{90}{\textbf{Attack}} & \makecell{\textbf{Frac.}\\\textbf{edges}\\\(\boldsymbol{\epsilon}\)} & \textbf{Loss} &                                                                  &                                                                    &                             &                       

Unnamed: 0_level_0,Unnamed: 1_level_0,\textbf{Architecture},\makecell{\underline{Soft}\\\underline{Median}\\\underline{GDC}},\makecell{\underline{Soft}\\\underline{Median}\\\underline{PPRGo}},\makecell{Vanilla\\GCN},\makecell{Vanilla\\GDC},\makecell{Vanilla\\PPRGo},\makecell{Soft\\Medoid\\GDC},\makecell{Jaccard\\GCN},\makecell{RGCN}
\rotatebox{90}{\textbf{Attack}},\makecell{\textbf{Frac.}\\\textbf{edges}\\\(\boldsymbol{\epsilon}\)},\textbf{Loss},Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},CE,0.813 $\pm$ 0.002,0.816 $\pm$ 0.000,0.814 $\pm$ 0.004,0.826 $\pm$ 0.002,0.818 $\pm$ 0.002,0.810 $\pm$ 0.003,0.806 $\pm$ 0.003,0.807 $\pm$ 0.002
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},Margin,0.820 $\pm$ 0.001,0.820 $\pm$ 0.001,0.813 $\pm$ 0.003,0.825 $\pm$ 0.003,0.818 $\pm$ 0.002,0.816 $\pm$ 0.002,0.804 $\pm$ 0.003,0.804 $\pm$ 0.002
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},CW,0.820 $\pm$ 0.001,0.819 $\pm$ 0.001,0.814 $\pm$ 0.003,0.826 $\pm$ 0.003,0.818 $\pm$ 0.001,0.816 $\pm$ 0.002,0.805 $\pm$ 0.003,0.804 $\pm$ 0.003
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},NCE,0.822 $\pm$ 0.001,0.820 $\pm$ 0.001,0.818 $\pm$ 0.003,0.831 $\pm$ 0.003,0.822 $\pm$ 0.002,0.818 $\pm$ 0.002,0.809 $\pm$ 0.002,0.807 $\pm$ 0.003
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},elu Margin,0.821 $\pm$ 0.001,0.819 $\pm$ 0.001,0.814 $\pm$ 0.003,0.826 $\pm$ 0.003,0.817 $\pm$ 0.002,0.817 $\pm$ 0.002,0.804 $\pm$ 0.003,0.804 $\pm$ 0.002
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},\underline{MCE},0.811 $\pm$ 0.002,0.813 $\pm$ 0.001,0.795 $\pm$ 0.004,0.811 $\pm$ 0.003,0.807 $\pm$ 0.001,0.808 $\pm$ 0.002,0.791 $\pm$ 0.003,0.794 $\pm$ 0.000
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},\underline{tanh Margin},\textbf{0.806 $\pm$ 0.001},0.811 $\pm$ 0.001,0.801 $\pm$ 0.003,0.810 $\pm$ 0.003,0.803 $\pm$ 0.002,0.807 $\pm$ 0.003,0.794 $\pm$ 0.002,0.796 $\pm$ 0.001
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.01$},\makecell{\underline{0.5 tanh Margin}\\\underline{+ 0.5 MCE}},0.806 $\pm$ 0.001,\textbf{0.811 $\pm$ 0.000},\textbf{0.792 $\pm$ 0.004},\textbf{0.806 $\pm$ 0.002},\textbf{0.799 $\pm$ 0.002},\textbf{0.806 $\pm$ 0.003},\textbf{0.788 $\pm$ 0.003},\textbf{0.790 $\pm$ 0.001}
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.05$},CE,0.779 $\pm$ 0.001,0.789 $\pm$ 0.000,0.771 $\pm$ 0.004,0.776 $\pm$ 0.001,0.781 $\pm$ 0.002,0.776 $\pm$ 0.001,0.768 $\pm$ 0.003,0.764 $\pm$ 0.001
\rotatebox{90}{\textbf{FGSM}},\rotatebox{90}{$\epsilon=0.05$},Margin,0.799 $\pm$ 0.002,0.803 $\pm$ 0.001,0.751 $\pm$ 0.004,0.763 $\pm$ 0.004,0.774 $\pm$ 0.001,0.799 $\pm$ 0.002,0.748 $\pm$ 0.003,0.745 $\pm$ 0.003


## Plot selected

In [54]:
epsilons_c = r'Frac. edges $\epsilon$'

In [55]:
loss_map = {
    'CE': 'CE',
    'Margin': 'Margin',
    'CW': 'CW',
    'SCE': 'NCE',
    'eluMargin': 'elu Margin',
    'MCE': 'MCE',
    'tanhMargin': r'tanh Margin',
    #'tanhMarginMCE-0.25': '0.25 tanh Margin + 0.75 MCE',
    #'tanhMarginMCE-0.5': '0.5 tanh Margin + 0.5 MCE',
    #'tanhMarginMCE-0.75': '0.75 tanh Margin + 0.25 MCE',
    #'tanhMarginMCE-0.85': '0.85 tanh Margin + 0.15 MCE',
    #'tanhMarginMCE-0.9': '0.9 tanh Margin + 0.1 MCE',
    #'tanhMarginMCE-0.95': '0.95 tanh Margin + 0.05 MCE',
}

In [56]:
labels_selected = [
    'Vanilla GCN',
    #'Vanilla GDC',
    #'SVD GCN',
    #'Jaccard GCN',
    #'RGCN',
    #'Soft Medoid GDC (T=0.5)',
    #'Soft Median GDC (T=0.2)'
]

datasets_selected = ['citeseer', 'cora_ml', 'pubmed', 'ogbn-arxiv']

In [57]:
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, df_experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = df_experiment['config.dataset']
    df_result['attack'] = df_experiment['config.attack']
    df_result['seed'] = df_experiment['config.seed']
    df_result['batch_id'] = df_experiment['batch_id']
    df_result['loss'] = loss_str(df_experiment['config.attack_params.loss_type'])
    df_result['lr'] = df_experiment['config.attack_params.base_lr']

df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('batch_id')
df_results = df_results.drop_duplicates([
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_results

Unnamed: 0,label,epsilon,accuracy,dataset,attack,seed,batch_id,loss,lr
52862,Vanilla GCN,0.10,0.314199,ogbn-arxiv,PRBCD,0,1,Margin,
52924,Vanilla GCN,0.00,0.689422,ogbn-arxiv,PRBCD,5,1,NCE,
52923,Vanilla GCN,0.25,0.202601,ogbn-arxiv,PRBCD,5,1,CW,
52922,Vanilla GCN,0.10,0.400613,ogbn-arxiv,PRBCD,5,1,CW,
52921,Vanilla GCN,0.05,0.439191,ogbn-arxiv,PRBCD,5,1,CW,
...,...,...,...,...,...,...,...,...,...
52414,Vanilla GAT,0.00,0.815020,cora_ml,PGD,0,36,elu Margin,0.1
52413,Vanilla GAT,1.00,0.250593,cora_ml,PGD,0,36,Margin,0.1
52412,Vanilla GAT,0.50,0.401186,cora_ml,PGD,0,36,Margin,0.1
52418,Vanilla GAT,0.25,0.573913,cora_ml,PGD,0,36,elu Margin,0.1


In [58]:
df_results.loss.unique()

array(['Margin', 'NCE', 'CW', 'CE', 'elu Margin', 'tanh Margin', 'MCE',
       None], dtype=object)

In [60]:
df_results.loss.unique()

array(['Margin', 'NCE', 'CW', 'CE', 'elu Margin', 'tanh Margin', 'MCE',
       None], dtype=object)

In [61]:
for suffix, width in zip(['', '_large'], [0.25, 0.5]):
    for (dataset, attack, label, lr), df_group in df_results[
        (df_results['attack'].isin(['FGSM', 'PGD']) 
         | ((df_results['attack'] == 'PRBCD') & df_results['dataset'].isin(['pubmed', 'ogbn-arxiv'])))
        & df_results['label'].isin(labels_selected)
        & df_results['dataset'].isin(datasets_selected)
        #& (df_results['epsilon'] <= 0.1)
    ].fillna(-1).groupby(['dataset', 'attack', 'label', 'lr']):       
        print(dataset, attack, label, lr)

        fig, ax = mpl_latex.newfig(width=width, ratio_yx=1)

        for loss in loss_map.values():

            if df_group[df_group.loss == loss].shape[0] == 0:
                continue

            df_values = df_group[df_group.loss == loss].groupby('epsilon').accuracy.agg(['mean', 'sem']).reset_index()

            plt.errorbar(
                df_values.epsilon,
                df_values['mean'],
                df_values['sem'],
                label=loss#'Composite' if '+' in loss else loss
            )

            #print(f"{loss}: {df_values['mean']} p/m {df_values['sem']}")

            #if loss == 'CE':
            #    df_stronger = df_values.copy()
            #    plt.plot(df_stronger.epsilon, -df_stronger.iloc[0]['mean'] + 2 * df_stronger['mean'], color='grey', linestyle='--')

        plt.xlabel(epsilons_c)
        #if dataset == 'cora_ml':
        #    plt.ylim(0.5, 0.85)
        #elif dataset == 'ogbn-arxiv':
        #    plt.ylim(0.0, 0.75)
        #else:
        #    plt.ylim(0.425, 0.725)


        if label != 'Vanilla GCN':
            ax.get_yaxis().set_ticklabels([])

        mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss{suffix}_no_leglab', fig, close_fig=False)
        plt.ylabel('Pert. accuracy')

        mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss{suffix}_no_legend', fig, close_fig=False)
        ax.legend()

        mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss{suffix}', fig, close_fig=False)
        mpl_latex.dedicated_legend_plot(
            f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss{suffix}_legend',
            mod_label=lambda l: l,
            ncol=len(loss_map)
        )

        print(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss{suffix}')

citeseer FGSM Vanilla GCN -1.0


  self[key] = other[key]


latex/assets/global_transfer_FGSM_citeseer_Vanilla GCN_surrloss
citeseer PGD Vanilla GCN 0.1


  self[key] = other[key]


latex/assets/global_transfer_PGD_citeseer_Vanilla GCN_surrloss
cora_ml FGSM Vanilla GCN -1.0


  self[key] = other[key]


latex/assets/global_transfer_FGSM_cora_ml_Vanilla GCN_surrloss
cora_ml PGD Vanilla GCN 0.1


  self[key] = other[key]


latex/assets/global_transfer_PGD_cora_ml_Vanilla GCN_surrloss
ogbn-arxiv PRBCD Vanilla GCN -1.0


  self[key] = other[key]


latex/assets/global_transfer_PRBCD_ogbn-arxiv_Vanilla GCN_surrloss
pubmed PRBCD Vanilla GCN -1.0


  self[key] = other[key]
  figLegend = pylab.figure()
  self[key] = other[key]
  return plt.subplots(


latex/assets/global_transfer_PRBCD_pubmed_Vanilla GCN_surrloss
citeseer FGSM Vanilla GCN -1.0
latex/assets/global_transfer_FGSM_citeseer_Vanilla GCN_surrloss_large
citeseer PGD Vanilla GCN 0.1


  self[key] = other[key]


latex/assets/global_transfer_PGD_citeseer_Vanilla GCN_surrloss_large
cora_ml FGSM Vanilla GCN -1.0


  self[key] = other[key]


latex/assets/global_transfer_FGSM_cora_ml_Vanilla GCN_surrloss_large
cora_ml PGD Vanilla GCN 0.1


  self[key] = other[key]


latex/assets/global_transfer_PGD_cora_ml_Vanilla GCN_surrloss_large
ogbn-arxiv PRBCD Vanilla GCN -1.0


  self[key] = other[key]


latex/assets/global_transfer_PRBCD_ogbn-arxiv_Vanilla GCN_surrloss_large
pubmed PRBCD Vanilla GCN -1.0


  self[key] = other[key]


latex/assets/global_transfer_PRBCD_pubmed_Vanilla GCN_surrloss_large


In [74]:
dataset = 'ogbn-arxiv'
attack = 'PRBCD'
label = 'Vanilla GCN'

loss_map = {
    'CE': 'CE',
    'Margin': 'Margin',
    'CW': 'CW',
    'SCE': 'NCE',
    'eluMargin': 'elu Margin',
    'MCE': 'MCE',
    'tanhMargin': r'tanh Margin',
    'tanhMarginMCE-0.95': '0.95 tanh Margin + 0.05 MCE'
}

print(dataset, attack, label)

def loss_str(loss: str) -> str:
    if isinstance(loss, str):
        if loss in loss_map:
            return loss_map[loss]
        else:
            'NA'
    else:
        return loss_map['CE']
    
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, df_experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = df_experiment['config.dataset']
    df_result['attack'] = df_experiment['config.attack']
    df_result['seed'] = df_experiment['config.seed']
    df_result['batch_id'] = df_experiment['batch_id']
    df_result['loss'] = loss_str(df_experiment['config.attack_params.loss_type'])
    df_result['lr'] = df_experiment['config.attack_params.base_lr']

df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('batch_id')
df_results = df_results.drop_duplicates([
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_group = df_results[
    (df_results['attack'] == attack)
    & (df_results['label'] == label)
    & (df_results['dataset'] == dataset)
]

fig, ax = mpl_latex.newfig(width=width, ratio_yx=1)


for loss in loss_map.values():

    if df_group[df_group.loss == loss].shape[0] == 0:
        continue

    df_values = df_group[df_group.loss == loss].groupby('epsilon').accuracy.agg(['mean', 'sem']).reset_index()

    plt.errorbar(
        df_values.epsilon,
        df_values['mean'],
        df_values['sem'],
        label='Composite' if '+' in loss else loss
    )

    #print(f"{loss}: {df_values['mean']} p/m {df_values['sem']}")

    #if loss == 'CE':
    #    df_stronger = df_values.copy()
    #    plt.plot(df_stronger.epsilon, -df_stronger.iloc[0]['mean'] + 2 * df_stronger['mean'], color='grey', linestyle='--')

plt.xlabel(epsilons_c)
#if dataset == 'cora_ml':
#    plt.ylim(0.5, 0.85)
#elif dataset == 'ogbn-arxiv':
#    plt.ylim(0.0, 0.75)
#else:
#    plt.ylim(0.425, 0.725)


if label != 'Vanilla GCN':
    ax.get_yaxis().set_ticklabels([])

mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss_composite_no_leglab', fig, close_fig=False)
plt.ylabel('Pert. accuracy')

mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss_composite_no_legend', fig, close_fig=False)
ax.legend()

mpl_latex.savefig(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss_composite', fig, close_fig=False)
mpl_latex.dedicated_legend_plot(
    f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss_composite_legend',
    mod_label=lambda l: l,
    ncol=8
)

print(f'latex/assets/global_transfer_{attack}_{dataset}_{label}_surrloss_composite')

ogbn-arxiv PRBCD Vanilla GCN


  self[key] = other[key]


latex/assets/global_transfer_PRBCD_ogbn-arxiv_Vanilla GCN_surrloss_composite


# Direct Attacks

In [62]:
df_experiments_direct = seml.get_results('neurips21_global_attack_direct',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])
df_experiments_direct.tail()

  0%|          | 0/120 [00:00<?, ?it/s]

  0%|          | 0/120 [00:00<?, ?it/s]

  parsed = pd.io.json.json_normalize(parsed, sep='.')


Unnamed: 0,_id,batch_id,slurm.experiments_per_job,slurm.sbatch_options.time,slurm.sbatch_options.nodes,slurm.sbatch_options.cpus-per-task,slurm.sbatch_options.mem,slurm.sbatch_options.gres,config.overwrite,config.db_collection,config.dataset,config.data_dir,config.binary_attr,config.make_undirected,config.seed,config.attack,config.attack_params.loss_type,config.attack_params.search_space_size,config.epsilons,config.artifact_dir,config.model_storage_type,config.pert_adj_storage_type,config.pert_attr_storage_type,config.model_label,config.device,config.data_device,config.debug_level,result.results,config.attack_params.do_synchronize,config.attack_params.epochs,config.attack_params.fine_tune_epochs,config.attack_params.keep_heuristic,slurm.sbatch_options.job-name,slurm.sbatch_options.array,slurm.sbatch_options.output,slurm.array_id,slurm.task_id
115,164,3,1,0-08:00,1,4,4G,gpu:1,164,neurips21_global_attack_direct,citeseer,datasets/,False,True,5,PRBCD,tanhMargin,100000,"[0.5, 1.0]",cache,pretrained,evasion_global_adj,evasion_global_attr,Vanilla GCN,0,0,info,"[{'label': 'Vanilla GCN', 'epsilon': 0.5, 'acc...",True,400.0,100.0,WeightOnly,rgnn_at_scale_attack_evasion_global_direct_3,0-59,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6441992.0,55.0
116,165,3,1,0-08:00,1,4,4G,gpu:1,165,neurips21_global_attack_direct,citeseer,datasets/,False,True,5,PRBCD,tanhMargin,100000,"[0.5, 1.0]",cache,pretrained,evasion_global_adj,evasion_global_attr,Vanilla GDC,0,0,info,"[{'label': 'Vanilla GDC', 'epsilon': 0.5, 'acc...",True,400.0,100.0,WeightOnly,rgnn_at_scale_attack_evasion_global_direct_3,0-59,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6441992.0,56.0
117,166,3,1,0-08:00,1,4,4G,gpu:1,166,neurips21_global_attack_direct,citeseer,datasets/,False,True,5,PRBCD,tanhMargin,100000,"[0.5, 1.0]",cache,pretrained,evasion_global_adj,evasion_global_attr,Soft Median GDC (T=1.0),0,0,info,"[{'label': 'Soft Median GDC (T=1.0)', 'epsilon...",True,400.0,100.0,WeightOnly,rgnn_at_scale_attack_evasion_global_direct_3,0-59,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6441992.0,57.0
118,167,3,1,0-08:00,1,4,4G,gpu:1,167,neurips21_global_attack_direct,citeseer,datasets/,False,True,5,PRBCD,tanhMargin,100000,"[0.5, 1.0]",cache,pretrained,evasion_global_adj,evasion_global_attr,Soft Median GDC (T=0.5),0,0,info,"[{'label': 'Soft Median GDC (T=0.5)', 'epsilon...",True,400.0,100.0,WeightOnly,rgnn_at_scale_attack_evasion_global_direct_3,0-59,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6441992.0,58.0
119,168,3,1,0-08:00,1,4,4G,gpu:1,168,neurips21_global_attack_direct,citeseer,datasets/,False,True,5,PRBCD,tanhMargin,100000,"[0.5, 1.0]",cache,pretrained,evasion_global_adj,evasion_global_attr,Soft Median GDC (T=0.2),0,0,info,"[{'label': 'Soft Median GDC (T=0.2)', 'epsilon...",True,400.0,100.0,WeightOnly,rgnn_at_scale_attack_evasion_global_direct_3,0-59,/nfs/homedirs/geisler/code/robust_gnns_at_scal...,6441992.0,59.0


In [63]:
df_results_direct_list = [
    pd.DataFrame(r) 
    for r in df_experiments_direct['result.results']
]
for df_results_direct, (_, experiment) in zip(df_results_direct_list, df_experiments_direct.iterrows()):
    df_results_direct['dataset'] = experiment['config.dataset']
    df_results_direct['attack'] = experiment['config.attack']
    df_results_direct['seed'] = experiment['config.seed']
    df_results_direct['batch_id'] = experiment['batch_id']
    df_results_direct['novel_loss'] = (
        experiment['config.attack'] not in attack_loss_map
        or experiment['config.attack_params.loss_type'] == attack_loss_map[experiment['config.attack']]
    )
    
df_results_direct = pd.concat(df_results_direct_list, ignore_index=True)
df_results_direct = df_results_direct.sort_values('batch_id')
df_results_direct = df_results_direct.drop_duplicates([
    c for c in df_results_direct.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_results_direct

Unnamed: 0,label,epsilon,accuracy,dataset,attack,seed,batch_id,novel_loss
0,Vanilla GCN,0.00,0.815020,cora_ml,GreedyRBCD,0,1,True
203,Vanilla GCN,0.10,0.650593,cora_ml,PRBCD,5,1,True
202,Vanilla GCN,0.05,0.720158,cora_ml,PRBCD,5,1,True
201,Vanilla GCN,0.01,0.802767,cora_ml,PRBCD,5,1,True
200,Vanilla GCN,0.00,0.837549,cora_ml,PRBCD,5,1,True
...,...,...,...,...,...,...,...,...
332,Vanilla GDC,0.50,0.432086,citeseer,GreedyRBCD,0,3,True
331,Vanilla GCN,1.00,0.116043,citeseer,GreedyRBCD,0,3,True
330,Vanilla GCN,0.50,0.265241,citeseer,GreedyRBCD,0,3,True
358,Soft Median GDC (T=0.2),0.50,0.581283,citeseer,GreedyRBCD,5,3,True


In [64]:
labels_to_plot = [
    'Soft Median GDC (T=0.2)',
    'Vanilla GCN',
    'Vanilla GDC'
]

labels_ours = [
    'Soft Median GDC',
    'Soft Median PPRGo'
]

def transform_labels(label: str) -> str:
    label = label.replace(' (T=0.5)', '').replace(' (T=0.2)', '')
    if label in labels_ours:
        label = rf'\underline{{{label}}}'
    return label

In [65]:
df_direct = []

for (dataset, attack, label, epsilon), df_group in df_results_direct[
    df_results_direct['novel_loss']
    & df_results_direct['label'].isin(labels_to_plot)
].groupby(['dataset', 'attack', 'label', 'epsilon']):
    if len(df_group.seed.unique()) != 3:
        print(f'For {dataset}-{attack}-{epsilon}-{label} collected runs for seed {df_group.seed.tolist()}')
    if label == 'Soft Median GDC (T=0.2)' and dataset == 'ogbn-products':
        continue
    if label == 'Soft Median GDC (T=5.0)' and dataset == 'ogbn-arxiv':
        continue

    values, seeds = df_group.accuracy.values, df_group.seed.values
    seeds = seeds[~np.isnan(values)]
    values = values[~np.isnan(values)]

    idx = np.unique(seeds, return_index=True)[1]
    accuracy = values[idx].mean()
    
    df_direct.append({
        dataset_c: dataset_map[dataset],
        architecture_c: label,
        attack_c: attack_map[attack],
        epsilons_c: epsilon,
        'accuracy': accuracy,
        'accuracy_str': calc_mean_and_error(df_group.accuracy, df_group.seed, with_error=True, decimal_places=3)
    })
    
df_direct = pd.DataFrame(df_direct)

df_direct[architecture_c] = df_direct[architecture_c].apply(transform_labels).astype("category")
df_direct[architecture_c].cat.set_categories(
    [transform_labels(l) for l in labels_to_plot],
    inplace=True
)

df_direct[dataset_c] = df_direct[dataset_c].astype("category") # .apply(lambda cat: rf'\rotatebox{{90}}{{{cat}}}')
df_direct[dataset_c].cat.set_categories(
    dataset_order, #[rf'\rotatebox{{90}}{{{cat}}}' for cat in dataset_order],
    inplace=True
)

#df = df.sort_values([dataset_c, architecture_c])

# df_direct = df_direct[df_direct[epsilons_c].isin(epsilons)].copy()


df_direct

Unnamed: 0,Unnamed: 1,\textbf{Architecture},\rotatebox{90}{\textbf{Attack}},Frac. edges $\epsilon$,accuracy,accuracy_str
0,\textbf{Citeseer},\underline{Soft Median GDC},\underline{\textbf{GR-BCD}},0.0,0.7082,0.708 $\pm$ 0.002
1,\textbf{Citeseer},\underline{Soft Median GDC},\underline{\textbf{GR-BCD}},0.01,0.704813,0.705 $\pm$ 0.001
2,\textbf{Citeseer},\underline{Soft Median GDC},\underline{\textbf{GR-BCD}},0.05,0.686631,0.687 $\pm$ 0.002
3,\textbf{Citeseer},\underline{Soft Median GDC},\underline{\textbf{GR-BCD}},0.1,0.663993,0.664 $\pm$ 0.001
4,\textbf{Citeseer},\underline{Soft Median GDC},\underline{\textbf{GR-BCD}},0.25,0.626381,0.626 $\pm$ 0.003
5,\textbf{Citeseer},\underline{Soft Median GDC},\underline{\textbf{GR-BCD}},0.5,0.58164,0.582 $\pm$ 0.001
6,\textbf{Citeseer},\underline{Soft Median GDC},\underline{\textbf{GR-BCD}},1.0,0.536007,0.536 $\pm$ 0.004
7,\textbf{Citeseer},Vanilla GCN,\underline{\textbf{GR-BCD}},0.0,0.715865,0.716 $\pm$ 0.003
8,\textbf{Citeseer},Vanilla GCN,\underline{\textbf{GR-BCD}},0.01,0.689127,0.689 $\pm$ 0.002
9,\textbf{Citeseer},Vanilla GCN,\underline{\textbf{GR-BCD}},0.05,0.617825,0.618 $\pm$ 0.001


In [66]:
df_piv_direct = pd.pivot_table(
    df_direct[df_direct[epsilons_c] > 0], 
    index=[dataset_c, architecture_c], 
    columns=[attack_c, epsilons_c], #['type', attack_c, ' '], 
    values='accuracy_str',
    aggfunc=lambda x: ' '.join(x)
)

df_piv_direct = df_piv_direct[[c for attack in attack_order for c in df_piv_direct.columns if c[0] == attack]]

df_accuracy_tmp = df_accuracy_str.copy()
df_accuracy_tmp = df_accuracy_tmp[[i[1] in labels_to_plot for i in df_accuracy_tmp.index]]
df_accuracy_tmp.index = pd.MultiIndex.from_tuples(
    [(i[0].replace(r'\rotatebox{90}{', '')[:-1], transform_labels(i[1])) for i in df_accuracy_tmp.index],
    names=df_accuracy.index.names
)
df_piv_direct = pd.merge(df_piv_direct, df_accuracy_tmp, on=[dataset_c, architecture_c], how='left')

#df_piv_direct = df_piv_direct.apply(lambda row: row[-1] - row[:-1], axis=1).round(3)
#df_piv_direct = df_piv_direct.round(3)

#print(df_piv_direct.to_markdown())

best_defenses = []
for i in np.array(df_piv_direct.index.get_level_values(0).unique()):
    df_slice = df_piv_direct.loc[(i, slice(None))]
    best_defenses.append([df_slice[col].argsort()[-1] for col in df_slice])

df_piv_direct = df_piv_direct.applymap(lambda val: str(val) if val == val else val)
#for epsilon in epsilons:
#    current_columns = [c for c in df_piv_direct.columns if c[1] == epsilon]
#    df_piv_direct[current_columns] = df_piv_direct[current_columns].groupby(dataset_c).apply(
#        partial(mark_best_and_second_best, first_mark=best_attack_mark, second_mark='', axis=1, is_higher_better=False)
#    )

for i, bests in zip(np.array(df_piv_direct.index.get_level_values(0).unique()), best_defenses):
    df_slice = df_piv_direct.loc[(i, slice(None))]
    for col, best in zip(df_slice, bests):
        if best == -1:
            continue
        df_slice.loc[df_slice.index[best], col] = rf'{best_defense_mark}{{{df_slice.loc[df_slice.index[best], col]}}}'
    

#df_piv = pd.merge(df_piv, df_accuracy.round(2), on=['   ', architecture_c], how='left')
df_piv_direct = df_piv_direct.fillna('-')
df_piv_direct

Unnamed: 0_level_0,\rotatebox{90}{\textbf{Attack}},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\underline{\textbf{GR-BCD}},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\underline{\textbf{PR-BCD}},\textbf{Acc.}
Unnamed: 0_level_1,Frac. edges $\epsilon$,0.01,0.05,0.1,0.25,0.5,1.0,0.01,0.05,0.1,0.25,0.5,1.0,Unnamed: 14_level_1
Unnamed: 0_level_2,\textbf{Architecture},Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
\textbf{Cora ML},\underline{Soft Median GDC},0.807 $\pm$ 0.002,\textbf{0.773 $\pm$ 0.002},\textbf{0.749 $\pm$ 0.001},\textbf{0.692 $\pm$ 0.004},\textbf{0.648 $\pm$ 0.002},\textbf{0.603 $\pm$ 0.001},0.796 $\pm$ 0.002,\textbf{0.735 $\pm$ 0.002},\textbf{0.690 $\pm$ 0.002},\textbf{0.615 $\pm$ 0.003},\textbf{0.564 $\pm$ 0.005},\textbf{0.523 $\pm$ 0.005},0.824 $\pm$ 0.002
\textbf{Cora ML},Vanilla GCN,0.789 $\pm$ 0.003,0.699 $\pm$ 0.003,0.619 $\pm$ 0.004,0.475 $\pm$ 0.004,0.333 $\pm$ 0.003,0.148 $\pm$ 0.005,0.792 $\pm$ 0.003,0.704 $\pm$ 0.004,0.635 $\pm$ 0.005,0.478 $\pm$ 0.003,0.309 $\pm$ 0.005,0.141 $\pm$ 0.005,0.827 $\pm$ 0.003
\textbf{Cora ML},Vanilla GDC,\textbf{0.808 $\pm$ 0.002},0.749 $\pm$ 0.003,0.703 $\pm$ 0.003,0.623 $\pm$ 0.005,0.513 $\pm$ 0.005,0.396 $\pm$ 0.007,\textbf{0.799 $\pm$ 0.002},0.711 $\pm$ 0.003,0.645 $\pm$ 0.005,0.532 $\pm$ 0.006,0.457 $\pm$ 0.005,0.400 $\pm$ 0.006,\textbf{0.842 $\pm$ 0.003}
\textbf{Citeseer},\underline{Soft Median GDC},\textbf{0.705 $\pm$ 0.001},\textbf{0.687 $\pm$ 0.002},\textbf{0.664 $\pm$ 0.001},\textbf{0.626 $\pm$ 0.003},\textbf{0.582 $\pm$ 0.001},\textbf{0.536 $\pm$ 0.004},\textbf{0.692 $\pm$ 0.002},\textbf{0.650 $\pm$ 0.002},\textbf{0.615 $\pm$ 0.003},\textbf{0.548 $\pm$ 0.005},\textbf{0.494 $\pm$ 0.006},\textbf{0.446 $\pm$ 0.008},0.708 $\pm$ 0.002
\textbf{Citeseer},Vanilla GCN,0.689 $\pm$ 0.002,0.618 $\pm$ 0.001,0.554 $\pm$ 0.001,0.410 $\pm$ 0.003,0.265 $\pm$ 0.002,0.105 $\pm$ 0.008,0.689 $\pm$ 0.002,0.621 $\pm$ 0.003,0.560 $\pm$ 0.004,0.429 $\pm$ 0.007,0.282 $\pm$ 0.012,0.127 $\pm$ 0.009,\textbf{0.716 $\pm$ 0.003}
\textbf{Citeseer},Vanilla GDC,0.679 $\pm$ 0.001,0.626 $\pm$ 0.002,0.588 $\pm$ 0.006,0.504 $\pm$ 0.003,0.421 $\pm$ 0.003,0.309 $\pm$ 0.007,0.670 $\pm$ 0.001,0.591 $\pm$ 0.004,0.515 $\pm$ 0.002,0.374 $\pm$ 0.003,0.264 $\pm$ 0.007,0.194 $\pm$ 0.006,0.707 $\pm$ 0.001


In [67]:
print(df_piv_direct.to_markdown())

|                                                        | ('\\underline{\\textbf{GR-BCD}}', 0.01)   | ('\\underline{\\textbf{GR-BCD}}', 0.05)   | ('\\underline{\\textbf{GR-BCD}}', 0.1)   | ('\\underline{\\textbf{GR-BCD}}', 0.25)   | ('\\underline{\\textbf{GR-BCD}}', 0.5)   | ('\\underline{\\textbf{GR-BCD}}', 1.0)   | ('\\underline{\\textbf{PR-BCD}}', 0.01)   | ('\\underline{\\textbf{PR-BCD}}', 0.05)   | ('\\underline{\\textbf{PR-BCD}}', 0.1)   | ('\\underline{\\textbf{PR-BCD}}', 0.25)   | ('\\underline{\\textbf{PR-BCD}}', 0.5)   | ('\\underline{\\textbf{PR-BCD}}', 1.0)   | ('\\textbf{Acc.}', ' ')    |
|:-------------------------------------------------------|:------------------------------------------|:------------------------------------------|:-----------------------------------------|:------------------------------------------|:-----------------------------------------|:-----------------------------------------|:------------------------------------------|:--------------------------

In [68]:
for attack in df_direct[attack_c].unique():
    print(df_piv_direct.iloc[:, df_piv_direct.columns.get_level_values(0)==attack]\
        .sort_index(level=1, ascending=False, sort_remaining=False)\
        .sort_index(level=0, ascending=True, sort_remaining=True)\
        .to_latex(
            float_format='%.2f',
            caption=r'\todo{TBD}', 
            label='tab:appendix_direct',
            escape=False,
            multirow=True,
            multicolumn=True,
            column_format='llccccccccccccc'
        )
    )

\begin{table}
\centering
\caption{\todo{TBD}}
\label{tab:appendix_direct}
\begin{tabular}{llccccccccccccc}
\toprule
                  & \rotatebox{90}{\textbf{Attack}} & \multicolumn{6}{l}{\underline{\textbf{GR-BCD}}} \\
                  & Frac. edges $\epsilon$ &                        0.01 &                        0.05 &                        0.10 &                        0.25 &                        0.50 &                        1.00 \\
    & \textbf{Architecture} &                             &                             &                             &                             &                             &                             \\
\midrule
\multirow{3}{*}{\textbf{Cora ML}} & \underline{Soft Median GDC} &           0.807 $\pm$ 0.002 &  \textbf{0.773 $\pm$ 0.002} &  \textbf{0.749 $\pm$ 0.001} &  \textbf{0.692 $\pm$ 0.004} &  \textbf{0.648 $\pm$ 0.002} &  \textbf{0.603 $\pm$ 0.001} \\
                  & Vanilla GCN &           0.789 $\pm$ 0.003 &           0.699 $\pm$ 

# Datasets

In [69]:
'2'/2

TypeError: unsupported operand type(s) for /: 'str' and 'int'

In [84]:
def value_to_storage(val: float, decimals: int = 2):
    if val / 1e3 < 1:
        return f'{val:.{decimals}f} B'
    if val / 1e6 < 1:
        return f'{val / 1e3:.{decimals}f} kB'
    if val / 1e9 < 1:
        return f'{val / 1e6:.{decimals}f} MB'
    if val / 1e12 < 1:
        return f'{val / 1e9:.{decimals}f} GB'
    if val / 1e15 < 1:
        return f'{val / 1e12:.{decimals}f} TB'
    if val / 1e18 < 1:
        return f'{val / 1e15:.{decimals}f} PB'
    raise ValueError(f'{val} is too big for Peta!!!')

In [85]:
license_map = {
    'cora_ml': 'N/A',
    'citeseer': 'N/A',
    'pubmed': 'N/A',
    'ogbn-arxiv': r'\href{https://opendatacommons.org/licenses/by/}{ODC-BY}',
    'ogbn-products': r'\href{https://s3.amazonaws.com/amazon-reviews-pds/license.txt}{Amazon}',
    'ogbn-papers100M': r'\href{https://opendatacommons.org/licenses/by/}{ODC-BY}'
}

In [96]:
from ogb.nodeproppred import PygNodePropPredDataset

from rgnn_at_scale.data import load_dataset

dataset_df = []
for dataset in tqdm.tqdm(['cora_ml', 'citeseer', 'pubmed', 'ogbn-arxiv', 'ogbn-products', 'ogbn-papers100M']):
    if dataset.startswith('ogbn'):
        pyg_dataset = PygNodePropPredDataset(root='/nfs/staff-ssd/geisler/dontrobme/datasets', name=dataset)
        nnodes = pyg_dataset[0].x.shape[0]
        nedges = pyg_dataset[0].edge_index.shape[1]
        nfeatures = pyg_dataset[0].x.shape[1]
    else:
        graph = load_dataset(dataset, '/nfs/staff-ssd/geisler/dontrobme/datasets').standardize(
            make_unweighted=True,
            make_undirected=True,
            no_self_loops=True,
            select_lcc=True
        )
        #graph = load_and_standardize(dataset)
        nnodes = graph.adj_matrix.shape[0]
        nedges = graph.adj_matrix.nnz
        nfeatures = graph.attr_matrix.shape[1]
    dataset_df.append({
            r'\textbf{Dataset}': dataset_map[dataset].replace(r'rotatebox{90}', r'textbf'),
            r'\textbf{License}': license_map[dataset],
            r'\textbf{\#Features $d$}': f'{nfeatures:,}',
            r'\textbf{\#Nodes $n$}': f'{nnodes:,}',
            r'\textbf{\#Edges $e$}': f'{nedges:,}',
            r'\makecell{\textbf{\#Possible}\\\textbf{edges $n^2$}}': f'{nnodes ** 2:.3E}',
            r'\makecell{\textbf{Average}\\\textbf{degree $\nicefrac{e}{n}$}}': f'{nedges / nnodes:.2f}',
            r'\textbf{Size (dense)}': value_to_storage(4 * nnodes ** 2),
            r'\textbf{Size (sparse)}': value_to_storage(2*8*nedges + 4*nedges),
        })
dataset_df = pd.DataFrame(dataset_df).set_index(r'\textbf{Dataset}')
dataset_df

100%|██████████| 6/6 [00:56<00:00,  9.44s/it]


Unnamed: 0_level_0,\textbf{License},\textbf{\#Features $d$},\textbf{\#Nodes $n$},\textbf{\#Edges $e$},\makecell{\textbf{\#Possible}\\\textbf{edges $n^2$}}},\makecell{\textbf{Average}\\\textbf{degree $\nicefrac{e}{n}$}},\textbf{Size (dense)},\textbf{Size (sparse)}
\textbf{Dataset},Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
\textbf{Cora ML},,2879,2810,15962,7896000.0,5.68,31.58 MB,319.24 kB
\textbf{Citeseer},,3703,2110,7336,4452000.0,3.48,17.81 MB,146.72 kB
\textbf{PubMed},,500,19717,88648,388800000.0,4.5,1.56 GB,1.77 MB
\textbf{arXiv},\href{https://opendatacommons.org/licenses/by/...,128,169343,1166243,28680000000.0,6.89,114.71 GB,23.32 MB
\textbf{Products},\href{https://s3.amazonaws.com/amazon-reviews-...,100,2449029,123718280,5998000000000.0,50.52,23.99 TB,2.47 GB
\textbf{Papers 100M},\href{https://opendatacommons.org/licenses/by/...,128,111059956,1615685872,1.233e+16,14.55,49.34 PB,32.31 GB


In [87]:
dataset_df.to_dict()

{'\\textbf{License}': {'\\textbf{Cora ML}': 'N/A',
  '\\textbf{Citeseer}': 'N/A',
  '\\textbf{PubMed}': 'N/A',
  '\\textbf{arXiv}': '\\href{https://opendatacommons.org/licenses/by/}{ODC-BY}',
  '\\textbf{Products}': '\\href{https://s3.amazonaws.com/amazon-reviews-pds/license.txt}{Amazon License}',
  '\\textbf{Papers 100M}': '\\href{https://opendatacommons.org/licenses/by/}{ODC-BY}'},
 '\\textbf{\\#Nodes $n$}': {'\\textbf{Cora ML}': '2,810',
  '\\textbf{Citeseer}': '2,110',
  '\\textbf{PubMed}': '19,717',
  '\\textbf{arXiv}': '169,343',
  '\\textbf{Products}': '2,449,029',
  '\\textbf{Papers 100M}': '111,059,956'},
 '\\textbf{\\#Edges $e$}': {'\\textbf{Cora ML}': '15,962',
  '\\textbf{Citeseer}': '7,336',
  '\\textbf{PubMed}': '88,648',
  '\\textbf{arXiv}': '1,166,243',
  '\\textbf{Products}': '123,718,280',
  '\\textbf{Papers 100M}': '1,615,685,872'},
 '\\textbf{\\#Features $d$}': {'\\textbf{Cora ML}': '2,879',
  '\\textbf{Citeseer}': '3,703',
  '\\textbf{PubMed}': '500',
  '\\textbf{a

In [97]:
with pd.option_context("max_colwidth", 1000):
    print(dataset_df
        #.sort_index()
        .to_latex(
            float_format='%.2f',
            caption='Statistics of the used datasets. For the dense adjacency matrix we assume that each elements is represented by 4 bytes. In the sparse case we use two 8 byte integer pointers and a 4 bytes float value.', 
            label='tab:appendix_datasets',
            escape=False,
            column_format='llrrrrrrr'
        )
    )

\begin{table}
\centering
\caption{Statistics of the used datasets. For the dense adjacency matrix we assume that each elements is represented by 4 bytes. In the sparse case we use two 8 byte integer pointers and a 4 bytes float value.}
\label{tab:appendix_datasets}
\begin{tabular}{llrrrrrrr}
\toprule
{} &                                                                \textbf{License} & \textbf{\#Features $d$} & \textbf{\#Nodes $n$} & \textbf{\#Edges $e$} & \makecell{\textbf{\#Possible}\\\textbf{edges $n^2$}}} & \makecell{\textbf{Average}\\\textbf{degree $\nicefrac{e}{n}$}} & \textbf{Size (dense)} & \textbf{Size (sparse)} \\
\textbf{Dataset}     &                                                                                 &                         &                      &                      &                                                       &                                                                &                       &                        \\
\midrule
\textbf{Cor

# PPR

In [None]:
def ppr(A):
    A_norm = A / A.sum(-1)[:, None]
    return alpha * torch.inverse(torch.eye(4) + (alpha - 1) * A_norm)

def update(A_dense, ppr_original, u, v):
    i = torch.nonzero(u.flatten()).item()
    
    row = A_dense[i] + v
    row = row / row.sum()
    A_norm = A_dense[i] / A_dense[i].sum()
    row_diff = row - A_norm
    row_diff_norm = (alpha - 1) * row_diff
    print(row_diff_norm)

    P_inv = (1 / alpha) * ppr_original
    P_uv_inv = P_inv - (P_inv @ u @ row_diff_norm @ P_inv) / (1 + row_diff_norm @ P_inv @ u)
    ppr_update = alpha * P_uv_inv
    
    return ppr_update

alpha = 0.15
A_dense_list = [
    torch.tensor([[0, 1, 0, 1],
                  [1, 0, 1, 0],
                  [0, 0, 0, 1],
                  [1, 1, 1, 0]],
                 dtype=torch.float32),    
    torch.tensor([[0, 1, 0, 1],
                  [1, 0, 1, 0],
                  [0, 1, 0, 1],
                  [1, 1, 1, 0]],
                 dtype=torch.float32),
    torch.tensor([[0, 1, 0, 1],
                  [1, 0, 1, 0],
                  [0, 0, 0, 1],
                  [1, 1, 1, 0]],
                 dtype=torch.float32),    
    torch.tensor([[0, 1, 0, 1],
                  [1, 0, 1, 0],
                  [0, 1, 0, 1],
                  [1, 1, 1, 0]],
                 dtype=torch.float32)
]


u_list = [
    torch.tensor([[0], [0], [1], [0]], dtype=torch.float32),
    torch.tensor([[0], [0], [1], [0]], dtype=torch.float32),
    torch.tensor([[1], [0], [0], [0]], dtype=torch.float32),
    torch.tensor([[0], [1], [0], [0]], dtype=torch.float32),
    torch.tensor([[0], [0], [0], [1]], dtype=torch.float32)
]
v_list = [
    torch.tensor([[0.3, 0.1, 0, 0.3]], dtype=torch.float32, requires_grad=True),
    torch.tensor([[0.3, 0.1, 0, 0.3]], dtype=torch.float32, requires_grad=True),
    torch.tensor([[0.3, 0.1, 12, 0.3]], dtype=torch.float32, requires_grad=True),
    torch.tensor([[0.3, 0.1, 0.8, 0.3]], dtype=torch.float32, requires_grad=True),
    torch.tensor([[0.3, 0.1, 0.8, 0.3]], dtype=torch.float32, requires_grad=True)
]


for A_dense, u, v in zip(A_dense_list, u_list, v_list):

    ppr_original = ppr(A_dense)
    print(ppr_original)

    A_pert = A_dense + u@v
    print(A_pert)

    ppr_target = ppr(A_dense + u@v)
    print('Soll', ppr_target)

    ppr_update = update(A_dense, ppr_original, u, v)
    print('Ist', ppr_update)

    ppr_update.sum().backward()
    print(v.grad)

    assert torch.allclose(ppr_update, ppr_target)

In [None]:
for A_dense, u, v in zip(A_dense_list, u_list, v_list):

    ppr_original = ppr(A_dense)
    print(ppr_original)

    A_pert = A_dense + u@v
    print(A_pert)

    ppr_target = ppr(A_dense + u@v)
    print('Soll', ppr_target)

    ppr_update = update(A_dense, ppr_original, u, v)
    print('Ist', ppr_update)

    ppr_update.sum().backward()
    print(v.grad)

    assert torch.allclose(ppr_update, ppr_target)