In [None]:
import os
os.chdir('../')

In [None]:
%matplotlib inline
#%matplotlib notebook

%load_ext autoreload
%autoreload 2

In [None]:
from copy import deepcopy
from typing import List, Tuple

from cycler import cycler
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
import numpy as np
import pandas as pd
import scipy.sparse as sp
from sklearn.decomposition import PCA
import scipy.stats as stats
import torch
from torch import nn
import torch.nn.functional as F
import seml

import tqdm
tqdm.tqdm.pandas()
#plt.style.use('ggplot')

In [None]:
from notebooks import mpl_latex

In [None]:
#mpl_latex.enable_production_mode()

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)

In [None]:
labels_to_plot = [
    'Vanilla GCN',
    'Vanilla GDC',
    'SVD GCN',
    'Jaccard GCN',
    'RGCN',
    'Soft Medoid GDC (T=0.5)',
    #'Soft Median GDC (T=1.0)',
    #'Soft Median GDC (T=0.5)',
    'Soft Median GDC (T=0.2)'
]

In [None]:
dataset_map = {
    'cora_ml': r'\textbf{Cora ML}', # '\rotatebox{90}{Cora ML~\citep{Bojchevski2018}}',
    'citeseer': r'\textbf{Citeseer}', # ~\citep{McCallum2000}
    'pubmed': r'\textbf{PubMed}', # ~\citep{Sen2008}
    'ogbn-arxiv': r'\textbf{arXiv}', # ~\citep{Hu2020}
    'ogbn-products': r'\textbf{Products}', # ~\citep{Hu2020},
    'ogbn-papers100M': r'\textbf{Papers 100M}'
}
dataset_order = [dataset_map[k] for k in dataset_map.keys()]

In [None]:
attack_map = {
    'DICE': r'\textbf{DICE}',
    'GANG': r'\textbf{GANG (ours)}',
    'FGSM': r'\textbf{greedy FGSM}',
    'GreedyRBCD': r'\textbf{GR-BCD (ours)}',
    'PGD': r'\textbf{PGD}',
    'PRBCD': r'\textbf{PR-BCD (ours)}'
}
attack_order = [attack_map[k] for k in attack_map.keys()]

In [None]:
pm = r'\(\pm\)'
bpm = r'\(\boldsymbol{\pm}\)'

In [None]:
def make_max_bold(group):
    for col in list(group.columns):
        if not group[col].isna().all():
            idx = np.where(np.max(group[col].fillna("")) == group[col].fillna(""))[0]
            group.loc[group.index[idx], col] = rf'\textbf{{{group.loc[group.index[idx], col].iloc[0]}}}'\
                .replace(pm, bpm)
    return group

In [None]:
def make_second_best_underlined(group):
    for col in list(group.columns):
        if not group[col].isna().all() and np.unique(group[col].fillna("")).shape[0] > 1:
            second_max = np.unique(group[col].fillna("").apply(
                lambda v: v.replace(r'\textbf{', '').replace('}', '')
            ))[-2]
            idx = np.where(second_max == group[col].fillna(""))[0]
            group.loc[group.index[idx], col] = rf'\underline{{{group.loc[group.index[idx], col].iloc[0]}}}'
    return group

In [None]:
def mark_best_and_second_best(group: pd.DataFrame, 
                              first_mark: str = r'\textbf',
                              second_mark: str = r'\underline',
                              dimension: int = 0) -> pd.DataFrame:
    iterable  

In [None]:
def calc_mean_and_error(values: pd.Series, seeds: pd.Series, with_error=True, decimal_places: int = 3): 
    values, seeds = values.values, seeds.values
    seeds = seeds[~np.isnan(values)]
    values = values[~np.isnan(values)]

    idx = np.unique(seeds, return_index=True)[1]
    values = values[idx]
    
    if with_error:
        return rf'{np.mean(values):.{decimal_places}f} $\pm$ {np.std(values)/len(values):.{decimal_places}f}'
    else:
        return rf'{np.mean(values):.{decimal_places}f}'

In [None]:
from functools import partial

def _mark_best_and_second_best(vector: np.ndarray, 
                               first_mark: str = r'\textbf',
                               second_mark: str = r'\underline',
                               is_higher_better : bool = True) -> np.ndarray:
    vector = vector.astype(object)
    values = np.unique(vector[vector == vector])
    if not len(values):
        return vector
    
    if first_mark:
        if is_higher_better:
            mask = [vector == values[-1]]
        else:
            mask = [vector == values[0]]
        vector[mask] = np.char.add(np.char.add(first_mark + '{', vector[mask]), '}')
    if second_mark:
        if is_higher_better:
            mask = [vector == values[-2]]
        else:
            mask = [vector == values[1]]
        vector[mask] = np.char.add(np.char.add(second_mark + '{', vector[mask]), '}')
    return vector
    

def mark_best_and_second_best(df: pd.DataFrame, 
                              first_mark: str = r'\textbf',
                              second_mark: str = r'\underline',
                              axis: int = 0,
                              is_higher_better : bool = True) -> pd.DataFrame:
    df[:] = np.apply_along_axis(
        partial(_mark_best_and_second_best, first_mark=first_mark, 
                second_mark=second_mark, is_higher_better=is_higher_better),
        axis=axis,
        arr=df.values
    )
    return df

In [None]:
df_experiments = seml.get_results('kdd21_rgnn_at_scale_attack_evasion_transfer',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])
df_experiments.head()

In [None]:
df_experiments.columns

In [None]:
df_experiments.shape

In [None]:
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, df_experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = df_experiment['config.dataset']
    df_result['attack'] = df_experiment['config.attack']
    df_result['seed'] = df_experiment['config.seed']
    df_result['batch_id'] = df_experiment['batch_id']
    df_result['novel_loss'] = (
        (df_experiment['config.attack_params.stop_optimizing_if_label_flipped'] == True)
        | (df_experiment['config.attack_params.loss_type'] == 'MCE')
        | (df_experiment['config.attack_params.loss_type'] == 'tanhCW')
    )

df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('batch_id')
df_results = df_results.drop_duplicates([
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_results

In [None]:
df_results.attack.unique()

In [None]:
df_results.label.unique()

In [None]:
architecture_c = r'\textbf{Architecture}'
dataset_c = r'   '
attack_c = r'\textbf{Attack}'
epsilons_c = r'Frac. edges \(\boldsymbol{\epsilon}\)'

epsilons = [0.01, 0.05, 0.1, 0.25]
epsilon_marks = ['', r'\textit', r'\underline', r'\textbf']

In [None]:
def transform_label(label: str):
    return label.replace('\n', ' ').replace(' (T=0.5)','').replace(' (T=0.2)','')

In [None]:
df = []

for (dataset, attack, label, epsilon), df_group in df_results[
    (df_results['novel_loss']
     | (df_results['attack'] == 'DICE')
     | (df_results['attack'] == 'GANG'))
    & df_results['label'].isin(labels_to_plot)
].groupby(['dataset', 'attack', 'label', 'epsilon']):
    if len(df_group.seed.unique()) != 3:
        print(f'For {dataset}-{attack}-{epsilon} collected runs for seed {df_group.seed.tolist()}')

    accurcy = calc_mean_and_error(df_group.accuracy, df_group.seed, with_error=False)
    df.append({
        dataset_c: dataset_map[dataset],
        architecture_c: transform_label(label),
        attack_c: attack_map[attack],
        epsilons_c: epsilon,
        'accuracy': accurcy
    })
    
df = pd.DataFrame(df)

df = df[
    ~((df[attack_c] == attack_map['PGD']) & (df[dataset_c] == dataset_map['pubmed']))
    & ~((df[attack_c] == attack_map['FGSM']) & (df[dataset_c] == dataset_map['pubmed']))
    #& ~((df[architecture_c] == transform_label('Soft Medoid GDC (T=0.5)')) & (df[dataset_c] == dataset_map['pubmed']))
]

df = df[df[epsilons_c].isin(epsilons)].copy()

df

In [None]:
df_accuracy = df[(df[epsilons_c] == 0) & (df[attack_c] == attack_map['PRBCD'])]
df_accuracy = df_accuracy.drop(columns=[attack_c, epsilons_c])
df_accuracy = df_accuracy.set_index([dataset_c, architecture_c])
df_accuracy.columns = pd.MultiIndex.from_product([['\textbf{Accuracy}'], [' ']])
df_accuracy

In [None]:
df[architecture_c] = df[architecture_c].astype("category")
df[architecture_c].cat.set_categories(
    [transform_label(l) for l in labels_to_plot],
    inplace=True
)
df[dataset_c] = df[dataset_c].astype("category")
df[dataset_c].cat.set_categories(
    dataset_order,
    inplace=True
)

df_piv = pd.pivot_table(
    df, 
    index=[dataset_c, architecture_c], 
    columns=[attack_c, epsilons_c], #['type', attack_c, ' '], 
    values='accuracy',
    aggfunc=lambda x: ' '.join(x)
)

df_piv = df_piv[[c for attack in attack_order for c in df_piv.columns if c[0] == attack]]

for epsilon, mark in zip(epsilons, epsilon_marks):
    current_columns = [c for c in df_piv.columns if c[1] == epsilon]
    df_piv[current_columns] = df_piv[current_columns].groupby(dataset_c).apply(
        partial(mark_best_and_second_best, first_mark=mark, second_mark='', axis=1, is_higher_better=False)
    )

def acc_to_mean_and_error(group: pd.DataFrame, with_error=True, decimal_places: int = 3):
    return calc_mean_and_error(group['accuracy'], group['seed'], with_error=with_error, decimal_places=decimal_places)

df_piv = df_piv.join(df_accuracy)
df_piv = df_piv.fillna('-')
df_piv

In [32]:
print(df_piv\
    .sort_index(level=1, ascending=False, sort_remaining=False)\
    .sort_index(level=0, ascending=True, sort_remaining=True)\
    .to_latex(
        float_format='%.2f',
        caption=r'Perturbed accuracy for the proposed attacks (see Sections~\ref{sec:attackkdd}-\ref{sec:prbcd}) and baselines on all datasets (see Table~\ref{tab:datasets}). \(\epsilon\) denotes the fraction of edges perturbed (relative to the clean graph). The last column contains the clean accuracy. As this a work-in-progress report, the experiments for the defenses on the large datasets are due and on Products we did not optimize the hyperparameters for GANG. For each architecture we italicize the strongest attack where \(\epsilon=0.05\), underline where \(\epsilon=0.1\), and embolden where \(\epsilon=0.25\). From an attack perspective, a lower perturbed accuracy is better. We rerun the experiments with three different seeds. For OGB we use the provided data splits and otherwise we use random split with 20 nodes per class.', 
        label='tab:global',
        escape=False,
        multirow=True,
        multicolumn=True,
        column_format='llccccccccccccccccccccccccc'
    )
)

\begin{table}
\centering
\caption{Perturbed accuracy for the proposed attacks (see Sections~\ref{sec:attackkdd}-\ref{sec:prbcd}) and baselines on all datasets (see Table~\ref{tab:datasets}). \(\epsilon\) denotes the fraction of edges perturbed (relative to the clean graph). The last column contains the clean accuracy. As this a work-in-progress report, the experiments for the defenses on the large datasets are due and on Products we did not optimize the hyperparameters for GANG. For each architecture we italicize the strongest attack where \(\epsilon=0.05\), underline where \(\epsilon=0.1\), and embolden where \(\epsilon=0.25\). From an attack perspective, a lower perturbed accuracy is better. We rerun the experiments with three different seeds. For OGB we use the provided data splits and otherwise we use random split with 20 nodes per class.}
\label{tab:global}
\begin{tabular}{llccccccccccccccccccccccccc}
\toprule
                  & \textbf{Attack} & \multicolumn{4}{l}{\textbf{DICE}}

# Compare losses

In [37]:
df = []

for (dataset, attack, label, epsilon, novel_loss), df_group in df_results[
    df_results['attack'].isin(['FGSM', 'PGD'])
    & df_results['label'].isin(labels_to_plot)
].groupby(['dataset', 'attack', 'label', 'epsilon', 'novel_loss']):
    if len(df_group.seed.unique()) != 3:
        print(f'For {dataset}-{attack}-{epsilon} collected runs for seed {df_group.seed.tolist()}')

    accurcy = calc_mean_and_error(df_group.accuracy, df_group.seed, with_error=False)
    df.append({
        dataset_c: dataset_map[dataset],
        architecture_c: transform_label(label),
        attack_c: attack_map[attack],
        epsilons_c: epsilon,
        'accuracy': accurcy
    })
    
df = pd.DataFrame(df)

df = df[
    ~((df[attack_c] == attack_map['PGD']) & (df[dataset_c] == dataset_map['pubmed']))
    & ~((df[attack_c] == attack_map['FGSM']) & (df[dataset_c] == dataset_map['pubmed']))
]

df = df[df[epsilons_c].isin(epsilons)].copy()

df

For pubmed-PGD-0.0 collected runs for seed [1, 0]
For pubmed-PGD-0.01 collected runs for seed [1, 0]
For pubmed-PGD-0.05 collected runs for seed [1, 0]
For pubmed-PGD-0.1 collected runs for seed [1, 0]
For pubmed-PGD-0.25 collected runs for seed [1, 0]
For pubmed-PGD-0.0 collected runs for seed [1, 0]
For pubmed-PGD-0.01 collected runs for seed [1, 0]
For pubmed-PGD-0.05 collected runs for seed [1, 0]
For pubmed-PGD-0.1 collected runs for seed [1, 0]
For pubmed-PGD-0.25 collected runs for seed [1, 0]
For pubmed-PGD-0.0 collected runs for seed [1, 0]
For pubmed-PGD-0.01 collected runs for seed [1, 0]
For pubmed-PGD-0.05 collected runs for seed [1, 0]
For pubmed-PGD-0.1 collected runs for seed [1, 0]
For pubmed-PGD-0.25 collected runs for seed [1, 0]


Unnamed: 0,Unnamed: 1,\textbf{Architecture},\textbf{Attack},Frac. edges \(\boldsymbol{\epsilon}\),accuracy
2,\textbf{Citeseer},Jaccard GCN,\textbf{greedy FGSM},0.01,0.709
3,\textbf{Citeseer},Jaccard GCN,\textbf{greedy FGSM},0.01,0.696
4,\textbf{Citeseer},Jaccard GCN,\textbf{greedy FGSM},0.05,0.680
5,\textbf{Citeseer},Jaccard GCN,\textbf{greedy FGSM},0.05,0.641
6,\textbf{Citeseer},Jaccard GCN,\textbf{greedy FGSM},0.10,0.648
...,...,...,...,...,...
275,\textbf{Cora ML},Vanilla GDC,\textbf{PGD},0.05,0.716
276,\textbf{Cora ML},Vanilla GDC,\textbf{PGD},0.10,0.704
277,\textbf{Cora ML},Vanilla GDC,\textbf{PGD},0.10,0.648
278,\textbf{Cora ML},Vanilla GDC,\textbf{PGD},0.25,0.595


In [40]:
df[architecture_c] = df[architecture_c].astype("category")
df[architecture_c].cat.set_categories(
    [transform_label(l) for l in labels_to_plot],
    inplace=True
)
df[dataset_c] = df[dataset_c].astype("category")
df[dataset_c].cat.set_categories(
    dataset_order,
    inplace=True
)

df_piv = pd.pivot_table(
    df, 
    index=[dataset_c, architecture_c], 
    columns=[attack_c, epsilons_c], #['type', attack_c, ' '], 
    values='accuracy',
    aggfunc=lambda x: ' '.join(x)
)

df_piv = df_piv[[c for attack in attack_order for c in df_piv.columns if c[0] == attack]]

for epsilon, mark in zip(epsilons, epsilon_marks):
    current_columns = [c for c in df_piv.columns if c[1] == epsilon]
    df_piv[current_columns] = df_piv[current_columns].groupby(dataset_c).apply(
        partial(mark_best_and_second_best, first_mark=mark, second_mark='', axis=1, is_higher_better=False)
    )

def acc_to_mean_and_error(group: pd.DataFrame, with_error=True, decimal_places: int = 3):
    return calc_mean_and_error(group['accuracy'], group['seed'], with_error=with_error, decimal_places=decimal_places)

df_piv = df_piv.join(df_accuracy)
df_piv = df_piv.fillna('-')
df_piv

  vector[mask] = np.char.add(np.char.add(first_mark + '{', vector[mask]), '}')


Unnamed: 0_level_0,\textbf{Attack},\textbf{greedy FGSM},\textbf{greedy FGSM},\textbf{greedy FGSM},\textbf{greedy FGSM},\textbf{PGD},\textbf{PGD},\textbf{PGD},\textbf{PGD},\textbf{Accuracy}
Unnamed: 0_level_1,Frac. edges \(\boldsymbol{\epsilon}\),0.01,0.05,0.1,0.25,0.01,0.05,0.1,0.25,Unnamed: 10_level_1
Unnamed: 0_level_2,\textbf{Architecture},Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
\textbf{Cora ML},Vanilla GCN,0.809 0.786,0.758 0.691,0.719 0.609,0.635 0.460,0.805 0.789,\textit{0.755 0.},\underline{0.705 0.},\textbf{0.590 0.},-
\textbf{Cora ML},Vanilla GDC,0.814 0.795,0.759 0.704,0.718 0.638,0.639 0.528,0.811 0.796,\textit{0.755 0.},\underline{0.704 0.},\textbf{0.595 0.},-
\textbf{Cora ML},SVD GCN,0.758 0.757,0.741 0.743,0.715 0.721,0.640 0.637,0.757 0.757,\textit{0.735 0.},\underline{0.696 0.},\textbf{0.603 0.},-
\textbf{Cora ML},Jaccard GCN,0.807 0.787,0.760 0.712,0.722 0.644,0.643 0.525,0.802 0.790,\textit{0.756 0.},\underline{0.708 0.},\textbf{0.601 0.},-
\textbf{Cora ML},RGCN,0.786 0.773,0.743 0.700,0.708 0.639,0.631 0.510,0.785 0.776,\textit{0.739 0.},\underline{0.699 0.},\textbf{0.594 0.},-
\textbf{Cora ML},Soft Medoid GDC,0.806 0.807,\textit{0.772 0.},0.743 0.777,\textbf{0.679 0.},0.805 0.805,0.774 0.782,\underline{0.743 0.},0.683 0.725,-
\textbf{Cora ML},Soft Median GDC,0.809 0.808,\textit{0.772 0.},0.741 0.769,\textbf{0.675 0.},0.807 0.805,0.774 0.776,\underline{0.740 0.},0.677 0.718,-
\textbf{Citeseer},Vanilla GCN,0.705 0.685,0.669 0.606,0.632 0.534,0.540 0.390,0.703 0.687,\textit{0.660 0.},\underline{0.618 0.},\textbf{0.526 0.},-
\textbf{Citeseer},Vanilla GDC,0.700 0.683,0.659 0.604,0.622 0.535,0.533 0.413,0.698 0.684,\textit{0.655 0.},\underline{0.610 0.},\textbf{0.517 0.},-
\textbf{Citeseer},SVD GCN,0.640 0.638,0.624 0.621,0.602 0.599,\textbf{0.363 0.},0.638 0.638,\textit{0.614 0.},\underline{0.593 0.},0.490 0.471,-


# Datasets

In [15]:
def value_to_storage(val: float, decimals: int = 2):
    if val / 1e3 < 1:
        return f'{val:.{decimals}f} B'
    if val / 1e6 < 1:
        return f'{val / 1e3:.{decimals}f} kB'
    if val / 1e9 < 1:
        return f'{val / 1e6:.{decimals}f} MB'
    if val / 1e12 < 1:
        return f'{val / 1e9:.{decimals}f} GB'
    if val / 1e15 < 1:
        return f'{val / 1e12:.{decimals}f} TB'
    if val / 1e18 < 1:
        return f'{val / 1e15:.{decimals}f} PB'
    raise ValueError(f'{val} is too big for Peta!!!')

In [17]:
from ogb.nodeproppred import PygNodePropPredDataset

from rgnn_at_scale.data import load_dataset

dataset_df = []
for dataset in tqdm.tqdm(['cora_ml', 'citeseer', 'pubmed', 'ogbn-arxiv', 'ogbn-products', 'ogbn-papers100M']):
    if dataset.startswith('ogbn'):
        pyg_dataset = PygNodePropPredDataset(root='/nfs/staff-ssd/geisler/dontrobme/datasets', name=dataset)
        nnodes = pyg_dataset[0].x.shape[0]
        nedges = pyg_dataset[0].edge_index.shape[1]
        nfeatures = pyg_dataset[0].x.shape[1]
    else:
        graph = load_dataset(dataset, '/nfs/staff-ssd/geisler/dontrobme/datasets')
        #graph = load_and_standardize(dataset)
        nnodes = graph.adj_matrix.shape[0]
        nedges = graph.adj_matrix.nnz
        nfeatures = graph.attr_matrix.shape[1]
    dataset_df.append({
            r'\textbf{Dataset}': dataset_map[dataset].replace(r'rotatebox{90}', r'textbf'),
            r'\textbf{\#Nodes $n$}': f'{nnodes:,}',
            r'\textbf{\#Edges $e$}': f'{nedges:,}',
            r'\textbf{\#Features $d$}': f'{nfeatures:,}',
            r'\textbf{\#Possible edges}': f'{nnodes ** 2:.3E}',
            r'\textbf{Size (dense)}': value_to_storage(4 * nnodes ** 2),
            r'\textbf{Size (sparse)}': value_to_storage(2*8*nedges + 4*nedges),
        })
dataset_df = pd.DataFrame(dataset_df).set_index(r'\textbf{Dataset}')
dataset_df

100%|██████████| 6/6 [11:48<00:00, 118.08s/it]


Unnamed: 0_level_0,\textbf{\#Nodes $n$},\textbf{\#Edges $e$},\textbf{\#Features $d$},\textbf{\#Possible edges},\textbf{Size (dense)},\textbf{Size (sparse)}
\textbf{Dataset},Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
\textbf{Cora ML},2995,8416,2879,8970000.0,35.88 MB,168.32 kB
\textbf{Citeseer},3312,4715,3703,10970000.0,43.88 MB,94.30 kB
\textbf{PubMed},19717,88648,500,388800000.0,1.56 GB,1.77 MB
\textbf{arXiv},169343,1166243,128,28680000000.0,114.71 GB,23.32 MB
\textbf{Products},2449029,123718280,100,5998000000000.0,23.99 TB,2.47 GB
\textbf{Papers 100M},111059956,1615685872,128,1.233e+16,49.34 PB,32.31 GB


In [18]:
print(dataset_df
    #.sort_index()
    .to_latex(
        float_format='%.2f',
        caption='Statistics of the used datasets. For the dense adjacency matrix we assume that each elements is represented by 4 bytes. In the sparse case we use two 8 byte integer pointers and a 4 bytes float value.', 
        label='tab:datasets',
        escape=False
    )
)

\begin{table}
\centering
\caption{Statistics of the used datasets. For the dense adjacency matrix we assume that each elements is represented by 4 bytes. In the sparse case we use two 8 byte integer pointers and a 4 bytes float value.}
\label{tab:datasets}
\begin{tabular}{lllllll}
\toprule
{} & \textbf{\#Nodes $n$} & \textbf{\#Edges $e$} & \textbf{\#Features $d$} & \textbf{\#Possible edges} & \textbf{Size (dense)} & \textbf{Size (sparse)} \\
\textbf{Dataset}     &                      &                      &                         &                           &                       &                        \\
\midrule
\textbf{Cora ML}     &                2,995 &                8,416 &                   2,879 &                 8.970E+06 &              35.88 MB &              168.32 kB \\
\textbf{Citeseer}    &                3,312 &                4,715 &                   3,703 &                 1.097E+07 &              43.88 MB &               94.30 kB \\
\textbf{PubMed}      &    