In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

In [4]:
seed = 42
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)

In [140]:
from src.nettack.GCN import *
from src.nettack.utils import *
from src.nettack.nettack import *
from src.nettack.replication import *
from src.nettack.defense import *

# Graph randomization

# Graph optimization

## Normal attack

In [77]:
Ev = Evaluater()
Ev.load_dataset('src/data/cora.npz')
Ev.create_splits()
Ev.train_model(surrogate=True)

Selecting 1 largest connected components
Number of training node : 247
Number of validation nodes : 249
Number of unlabeled (unknown) nodes : 1988


  'precision', 'predicted', average, warn_for)


converged after 14 iterations
Validation accuracy : 81.53%
Train accuracy : 100.00%
Unlabeled accuracy : 82.85%


In [78]:
u = Ev.split_unlabeled[0]
u

373

In [79]:
Ev.attack(u=u, verbose=True)

##### Starting attack #####
##### Attack node with ID 373 using structure and feature perturbations #####
##### Attacking the node directly #####
##### Performing 5 perturbations #####
##### ...1/5 perturbations ... #####
Edge perturbation: [373 787]
##### ...2/5 perturbations ... #####
Edge perturbation: [ 373 2122]
##### ...3/5 perturbations ... #####
Edge perturbation: [ 373 2121]
##### ...4/5 perturbations ... #####
Edge perturbation: [ 373 1422]
##### ...5/5 perturbations ... #####
Edge perturbation: [373 774]


In [80]:
Ev.train_model(surrogate=False, with_perturb=False)
Ev.margins[Ev.nettack.u]

converged after 24 iterations
Validation accuracy : 85.54%
Train accuracy : 100.00%
Unlabeled accuracy : 84.05%


-0.35043725

In [81]:
Ev.train_model(surrogate=False, with_perturb=True)
Ev.margins[Ev.nettack.u]

converged after 28 iterations
Validation accuracy : 84.34%
Train accuracy : 100.00%
Unlabeled accuracy : 84.66%


-0.9972175

## Defending then attacking

In [82]:
EvDef = EvaluaterDef()
EvDef.load_dataset('src/data/cora.npz')
EvDef.split_train = Ev.split_train
EvDef.split_val = Ev.split_val
EvDef.split_unlabeled = Ev.split_unlabeled
EvDef.train_model(surrogate=True, disp=False)

Selecting 1 largest connected components


In [83]:
EvDef.defend(u=u, verbose=True, perturb_features=True)

##### Starting attack #####
##### Attack node with ID 373 using structure and feature perturbations #####
##### Attacking the node directly #####
##### Performing 5 perturbations #####
##### ...1/5 perturbations ... #####
Edge perturbation: [373 405]
##### ...2/5 perturbations ... #####
Edge perturbation: [ 373 2049]
##### ...3/5 perturbations ... #####
Edge perturbation: [ 373 1241]
##### ...4/5 perturbations ... #####
Edge perturbation: [ 373 2434]
##### ...5/5 perturbations ... #####
Edge perturbation: [ 373 2307]


In [84]:
# If no attack
EvDef.train_model(surrogate=False, with_perturb=False)
EvDef.margins[Ev.nettack.u]

converged after 29 iterations
Validation accuracy : 83.94%
Train accuracy : 100.00%
Unlabeled accuracy : 84.31%


-0.23924547

In [85]:
# attacking on the defended graph
nn = GCN([16, EvDef.K],
         sparse_numpy2sparse_torch(EvDef.nettack.adj_preprocessed),
         sparse_numpy2sparse_torch(EvDef.nettack.X_obs), with_relu=True)
model = GCN_Model(nn, lr=1e-2)
model.train(Ev.split_train, Ev.split_val, Ev.Ztorch, print_info=True, debug=False)
# Computing logits for every node
model._compute_loss_and_backprop(np.arange(Ev.N), Ev.Ztorch, backward=False)
logits = model.logit_nodes.detach().cpu().numpy()
probas = np.exp(logits) / np.exp(logits).sum(1)[:, None]
probas_surr_sorted = np.argsort(-probas, axis=1)
second_l = probas_surr_sorted[np.arange(Ev.N), (probas_surr_sorted == Ev.Z[:, None]).argmin(axis=1)]
margins = (probas[np.arange(Ev.N), Ev.Z] - probas[np.arange(Ev.N), second_l])
margins[Ev.nettack.u]

converged after 38 iterations


0.98006696

## Bigger evaluation

### Cora

In [88]:
Ev = Evaluater()
Ev.load_dataset('src/data/cora.npz')
Ev.create_splits()
Ev.train_model(surrogate=True, disp=False)

EvDef = EvaluaterDef()
EvDef.load_dataset('src/data/cora.npz')
EvDef.split_train = Ev.split_train
EvDef.split_val = Ev.split_val
EvDef.split_unlabeled = Ev.split_unlabeled
EvDef.train_model(surrogate=True, disp=False)

Selecting 1 largest connected components
Number of training node : 247
Number of validation nodes : 249
Number of unlabeled (unknown) nodes : 1988


  'precision', 'predicted', average, warn_for)


Selecting 1 largest connected components


In [110]:
margins, nodes = evaluate_graph_optim(n_nodes=10, n_retrain=10)

HBox(children=(IntProgress(value=0), HTML(value='')))

In [111]:
margins.mean(1)

array([[ 0.99850892, -0.99936984,  0.99756885,  0.99999861],
       [ 0.03302533, -0.91940293,  0.0925908 ,  0.99052081],
       [ 0.97546324, -0.04024588,  0.98060873,  0.99756142],
       [ 0.83337374, -0.98811147,  0.83941042,  0.9971488 ],
       [ 0.94310045, -0.8598625 ,  0.94944282,  0.99712673],
       [-0.83084099, -0.99970549, -0.85522097, -0.75348406],
       [ 0.86471704, -0.94760023,  0.91002108,  0.99914728],
       [ 0.36517015, -0.80328937,  0.3790491 ,  0.99527744],
       [ 0.99945073, -0.9719606 ,  0.99943139,  0.99999596],
       [ 0.9990738 , -0.68916427,  0.99913055,  0.99999229]])

In [136]:
plt.figure()
plt.boxplot(np.stack([margins[:, :, i].flatten() for i in [0, 2, 1, 3]], axis=1),
            labels=['No attack\nBefore defense', 'No attack\nAfter defense',
                    'Attack\nBefore defense' ,'Attack\nAfter defense'],
            positions=[0, 2, 5, 7])
plt.ylabel('margin')
plt.savefig('figures/defense_optim_cora.pdf', format='pdf')

<IPython.core.display.Javascript object>

### Citeseer

In [141]:
Ev = Evaluater()
Ev.load_dataset('src/data/citeseer.npz')
Ev.create_splits()
Ev.train_model(surrogate=True, disp=False)

EvDef = EvaluaterDef()
EvDef.load_dataset('src/data/citeseer.npz')
EvDef.split_train = Ev.split_train
EvDef.split_val = Ev.split_val
EvDef.split_unlabeled = Ev.split_unlabeled
EvDef.train_model(surrogate=True, disp=False)

Selecting 1 largest connected components
Number of training node : 210
Number of validation nodes : 211
Number of unlabeled (unknown) nodes : 1688


  'precision', 'predicted', average, warn_for)


Selecting 1 largest connected components


In [None]:
margins, nodes = evaluate_graph_optim(Ev, EvDef, n_nodes=10, n_retrain=10)

HBox(children=(IntProgress(value=0), HTML(value='')))

In [111]:
margins.mean(1)

array([[ 0.99850892, -0.99936984,  0.99756885,  0.99999861],
       [ 0.03302533, -0.91940293,  0.0925908 ,  0.99052081],
       [ 0.97546324, -0.04024588,  0.98060873,  0.99756142],
       [ 0.83337374, -0.98811147,  0.83941042,  0.9971488 ],
       [ 0.94310045, -0.8598625 ,  0.94944282,  0.99712673],
       [-0.83084099, -0.99970549, -0.85522097, -0.75348406],
       [ 0.86471704, -0.94760023,  0.91002108,  0.99914728],
       [ 0.36517015, -0.80328937,  0.3790491 ,  0.99527744],
       [ 0.99945073, -0.9719606 ,  0.99943139,  0.99999596],
       [ 0.9990738 , -0.68916427,  0.99913055,  0.99999229]])

In [136]:
plt.figure()
plt.boxplot(np.stack([margins[:, :, i].flatten() for i in [0, 2, 1, 3]], axis=1),
            labels=['No attack\nBefore defense', 'No attack\nAfter defense',
                    'Attack\nBefore defense' ,'Attack\nAfter defense'],
            positions=[0, 2, 5, 7])
plt.ylabel('margin')
plt.savefig('figures/defense_optim_cora.pdf', format='pdf')

<IPython.core.display.Javascript object>