In [1]:
# Reload modules automatically
# https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
%autoreload 2

In [2]:
import logging
import math
import sys

import numpy as np
import torch
from strn_and_rbstness.attacks import Attack, create_attack
from strn_and_rbstness.data import GraphDataset, split
from strn_and_rbstness.helper.utils import accuracy, count_edges
from strn_and_rbstness.models import create_model
from strn_and_rbstness.train import _train
from common import CSBM

logger = logging.getLogger()
logger.setLevel(logging.INFO)



## Sample CSBM

In [82]:
seed = 0
n = 3000
avg_intra_degree = 1.5 * 2 # intra_edges_per_node * 2
avg_inter_degree = 0.5 * 2
p = avg_intra_degree * 2 / (n - 1)
q = avg_inter_degree * 2 / (n - 1)
K = 0.5 # Defines distance between means of the gaußians in sigma-units
sigma = 0.1
d = round(n / math.log(n)**2)
mu = np.array([K*sigma / (2 * d**0.5) for i in range(d)], dtype=np.float32)
cov = sigma**2 * np.identity(d, dtype=np.float32)

# X, A ~ CSBM(n, p, q, mu, cov)
csbm = CSBM(p, q, mu, cov)
X, A, y = csbm.sample(n, seed=0)
#csbm.check_separabilities(X, A, y)
print(f"Dim: {d}")

Dim: 47


## Configure GNN & Attack

In [83]:
model_params = dict(
    label="GCN",
    model="DenseGCN",
    n_filters=64,
)
train_params = dict(
    loss_type="CE",
    lr=1e-2,
    weight_decay=1e-3,
    patience=300,
    max_epochs=3000,
    use_selftrain = False, 
    use_advtrain = False,
)
attack = "PGD"
attack_params = dict(
    epochs=200,
    base_lr=1e-2,
    scale_lr_with_n_attacked_edges=True,
    loss_type="tanhMargin" # or tanhMargin or CW
)
epsilon = 0.05

# Other
split_params = {
    "strategy": "normal", # or "custom"
    "p_trn": 1,
    "p_tst": 0, # "normal" uses 1 - p_trn, only for custom split strategy
    "p_selftrn": 0 # Refers to unlabeled data, which is not test data, 
                    # only for custom split strategy
}
verbosity_params = dict(
    display_steps = 100
)   
# Device
device = 0
if not torch.cuda.is_available():
    device == "cpu", "CUDA is not availble, set device to 'cpu'"
else:
    device = torch.device(f"cuda:{device}")
    logging.info(f"Currently on gpu device {device}")

INFO:root:Currently on gpu device cuda:0


#### Train GNN

In [84]:
torch.manual_seed(seed)
np.random.seed(seed)
split_ids = split(y, split_params, seed)
X_gpu = torch.tensor(X, dtype=torch.float32, device=device)
A_gpu = torch.tensor(A, dtype=torch.float32, device=device)
y_gpu = torch.tensor(y, device=device)
graph = GraphDataset((X_gpu, A_gpu, y_gpu), split_ids)
model_params = dict(**model_params, 
                    n_features=graph.get_n_features(), 
                    n_classes=graph.get_n_classes())
model = create_model(model_params).to(device)
statistics = _train(model, graph, train_params, verbosity_params, None)
best_epoch = np.argmin(statistics[1])

INFO:root:
Epoch    0: loss_train: 0.69287, loss_val: 0.69271, acc_train: 0.52502, acc_val: 0.52099
INFO:root:
Epoch  100: loss_train: 0.52091, loss_val: 0.58610, acc_train: 0.76384, acc_val: 0.70153
INFO:root:
Epoch  200: loss_train: 0.48793, loss_val: 0.57483, acc_train: 0.78319, acc_val: 0.70286
INFO:root:
Epoch  300: loss_train: 0.48668, loss_val: 0.57214, acc_train: 0.78452, acc_val: 0.71019
INFO:root:
Epoch  400: loss_train: 0.47780, loss_val: 0.57490, acc_train: 0.79053, acc_val: 0.70020
INFO:root:
Epoch  500: loss_train: 0.47654, loss_val: 0.57453, acc_train: 0.78652, acc_val: 0.70620
INFO:root:
Epoch  600: loss_train: 0.47574, loss_val: 0.57695, acc_train: 0.78052, acc_val: 0.69887
INFO:root:
Epoch  700: loss_train: 0.48070, loss_val: 0.57645, acc_train: 0.78252, acc_val: 0.70353
INFO:root:
Epoch  800: loss_train: 0.47390, loss_val: 0.56654, acc_train: 0.79320, acc_val: 0.71019
INFO:root:
Epoch  900: loss_train: 0.47480, loss_val: 0.56654, acc_train: 0.78519, acc_val: 0.71153


In [85]:
idx_all = np.arange(len(y))
model.eval()
logits = model(X_gpu, A_gpu)
acc_trn = accuracy(logits, y_gpu)
print(acc_trn)

0.7583333253860474


#### Attack GNN

In [86]:
adversary = create_attack(attack, attr=X_gpu, adj=A_gpu, labels=y_gpu, 
                            model=model, idx_attack=idx_all, device=device, 
                            binary_attr=False,
                            make_undirected=True, 
                            **attack_params)
m  = count_edges(A_gpu, idx_all)
n_perturbations = int(round(epsilon * m))
print(f"#Edges: {m} -> budget: {n_perturbations}")
adversary.attack(n_perturbations, _run=None)
A_pert, X_pert = adversary.get_pertubations()
logits, acc = Attack.evaluate_global(model, X_pert, A_pert, y_gpu, idx_all)
print(f"Accuracy: {acc}")

#Edges: 6206 -> budget: 310
Accuracy: 0.6576666831970215


In [87]:
logits = model(X_gpu, A_gpu)
correct = logits.argmax(1) == y_gpu
logits_pert = model(X_pert, A_pert)
correct_pert = logits_pert.argmax(1) == y_gpu

In [88]:
print(f"Correctly Classified: {sum(correct)}")
print(f"Correctly Classified Pert: {sum(correct_pert)}")
correct_after_pert = correct[correct] == correct_pert[correct]
print(f"Still correct after perturbation: {sum(correct_after_pert)}")
print(f"Flipped Predictions: {sum(correct) - sum(correct_after_pert)}")


Correctly Classified: 2275
Correctly Classified Pert: 1973
Still correct after perturbation: 1959
Flipped Predictions: 316


#### Eval Separability

In [89]:
csbm.check_separabilities(X, A, y)

Feature Separability:
n_corr: 1854
n_wrong: 1146
Structure Separability:
n_corr: 2549
n_wrong: 451
Likelihood Separability:
n_corr: 2581
n_wrong: 419


In [90]:
csbm.check_separabilities(X, A_pert.to_dense().cpu().numpy(), y)

Feature Separability:
n_corr: 1854
n_wrong: 1146
Structure Separability:
n_corr: 2463
n_wrong: 537
Likelihood Separability:
n_corr: 2525
n_wrong: 475
