In [None]:
import seml
import pandas as pd
import numpy as np
from run_seml import run
from matplotlib import pyplot as plt

pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 150)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
df_experiments = seml.get_results('kdd21_rgnn_at_scale_attack_evasion_transfer',
                                  to_data_frame=True,
                                  fields=['batch_id', 'slurm', 'config', 'result'])

In [None]:
df_results = [
    pd.DataFrame(r) 
    for r in df_experiments['result.results']
]
for df_result, (_, df_experiment) in zip(df_results, df_experiments.iterrows()):
    df_result['dataset'] = df_experiment['config.dataset']
    df_result['attack'] = df_experiment['config.attack']
    df_result['seed'] = df_experiment['config.seed']
    df_result['batch_id'] = df_experiment['batch_id']
    df_result['novel_loss'] = (
        #(df_experiment['config.attack_params.stop_optimizing_if_label_flipped'] == True)
         (df_experiment['config.attack_params.loss_type'] == 'MCE')
        | (df_experiment['config.attack_params.loss_type'] == 'tanhCW')
    )

df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('batch_id')
df_results = df_results.drop_duplicates([
    c for c in df_results.columns if c != 'batch_id' and c != 'accuracy'
], keep='last')

df_results

In [None]:
dataset_map = {
    'cora_ml': r'Cora ML', 
    'citeseer': r'Citeseer', 
}
dataset_order = [dataset_map[k] for k in dataset_map.keys()]

In [None]:
attack_map = {
    'PRBCD': r'PR-BCD'
}
attack_order = [attack_map[k] for k in attack_map.keys()]

In [None]:
def transform_label(label: str):
    return label

In [None]:
def calc_mean_and_error(values: pd.Series, seeds: pd.Series, with_error=True, decimal_places: int = 3): 
    values, seeds = values.values, seeds.values
    seeds = seeds[~np.isnan(values)]
    values = values[~np.isnan(values)]

    idx = np.unique(seeds, return_index=True)[1]
    values = values[idx]
    
    if with_error:
        return rf'{np.mean(values):.{decimal_places}f} $\pm$ {np.std(values)/len(values):.{decimal_places}f}'
    else:
        return rf'{np.mean(values):.{decimal_places}f}'

In [None]:
architecture_c = r'Architecture'
dataset_c = r'   '
attack_c = r'Attack'
epsilons_c = r'Frac. edges (epsilon)'

epsilons = [0.05, 0.1, 0.25]
epsilon_marks = [r'\textit', r'\underline', r'\textbf']

In [None]:

df_cora = []
df_citeseer = []

for (dataset, attack, label, epsilon), df_group in df_results.groupby(['dataset', 'attack', 'label', 'epsilon']):
       if len(df_group.seed.unique()) != 3:
              print(f'For {dataset}-{attack}-{epsilon} collected runs for seed {df_group.seed.tolist()}')

       accurcy = calc_mean_and_error(df_group.accuracy, df_group.seed, with_error=False)
       if dataset == "citeseer":
              df_citeseer.append({
                     dataset_c: dataset_map[dataset],
                     architecture_c: transform_label(label),
                     attack_c: attack_map[attack],
                     epsilons_c: epsilon,
                     'accuracy': accurcy
              })
       elif dataset == "cora_ml":
              df_cora.append({
                     dataset_c: dataset_map[dataset],
                     architecture_c: transform_label(label),
                     attack_c: attack_map[attack],
                     epsilons_c: epsilon,
                     'accuracy': accurcy
              })


df_cora = pd.DataFrame(df_cora)
df_citeseer = pd.DataFrame(df_citeseer)

In [None]:

print(df_cora.to_markdown())