In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib notebook

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

In [3]:
seed = 42
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)

In [4]:
from src.nettack.GCN import *
from src.nettack.utils import *
from src.nettack.nettack import *
from src.nettack.replication import *

# Classification

## Cora

In [5]:
Ev = Evaluater()
Ev.load_dataset('src/data/cora.npz')
Ev.create_splits()
Ev.train_model(surrogate=False)

Selecting 1 largest connected components
Number of training node : 247
Number of validation nodes : 249
Number of unlabeled (unknown) nodes : 1988


  'precision', 'predicted', average, warn_for)


converged after 51 iterations
Validation accuracy : 89.16%
Train accuracy : 100.00%
Unlabeled accuracy : 83.60%


In [6]:
clean_margins = Ev.margins

In [7]:
margin_unlabeled = Ev.margins[Ev.split_unlabeled]
sorted_margins = np.argsort(margin_unlabeled)

In [8]:
worst_ids = Ev.split_unlabeled[sorted_margins[:10]].astype(np.int32)
best_ids = Ev.split_unlabeled[sorted_margins[-10:]].astype(np.int32)
random_ids = Ev.split_unlabeled[np.random.choice(sorted_margins[10:-10], size=20, replace=False)].astype(np.int32)

In [9]:
worst_ids, best_ids, random_ids

(array([1365, 1165, 2400, 2392,  206,  428,  988,   30, 1095,  595]),
 array([ 683, 1185, 1340, 1255, 1347, 1820, 1163, 1342, 1504,  929]),
 array([ 471, 2463,  938, 2177, 2124,  885,  751,  424, 1985, 1010, 1871,
        1532, 2096, 1310,   46,  265,  543, 1728, 2375,  225]))

In [10]:
Ev.margins[worst_ids].mean(), Ev.margins[best_ids].mean(), Ev.margins[random_ids].mean()

(-0.989652, 0.9999568, 0.62518203)

In [11]:
Ev.train_model(surrogate=True)

converged after 36 iterations
Validation accuracy : 87.95%
Train accuracy : 100.00%
Unlabeled accuracy : 81.39%


### Direct attacks

In [14]:
worst_margins = Ev.produce_margins(worst_ids, direct=True, n_repeats=5, nb_process=0)
random_margins = Ev.produce_margins(random_ids, direct=True, n_repeats=5, nb_process=0)
best_margins = Ev.produce_margins(best_ids, direct=True, n_repeats=5, nb_process=0)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

  'precision', 'predicted', average, warn_for)





HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




In [39]:
plt.figure()
plt.boxplot([worst_margins.flatten(), random_margins.flatten(), best_margins.flatten()],
            positions=[0, 1, 2], labels=['worst', 'random', 'best']);
plt.ylabel('Margin')
plt.savefig('figures/direct_margins_cora.pdf', format='pdf')

<IPython.core.display.Javascript object>

In [16]:
plt.figure()
plt.scatter(Ev.margins[worst_ids], worst_margins.mean(0), label='worst initial margins')
plt.scatter(Ev.margins[random_ids], random_margins.mean(0), label='random initial margins')
plt.scatter(Ev.margins[best_ids], best_margins.mean(0), label='best initial margins')
plt.plot([-1, 1], [-1, 1], label='random algo')
plt.xlabel('Clean margin')
plt.ylabel('Attacked margin')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1dc8424eb38>

In [54]:
np.array(worst_margins).mean(), np.array(random_margins).mean(), np.array(best_margins).mean()

(-0.9999269, -0.89263266, -0.91951007)

### Indirect attacks

In [32]:
direct = False
worst_margins_in = Ev.produce_margins(worst_ids, direct=direct, n_repeats=5, nb_process=0)
random_margins_in = Ev.produce_margins(random_ids, direct=direct, n_repeats=5, nb_process=0)
best_margins_in = Ev.produce_margins(best_ids, direct=direct, n_repeats=5, nb_process=0)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

  'precision', 'predicted', average, warn_for)





HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




In [40]:
plt.figure()
plt.boxplot([worst_margins_in.flatten(), random_margins_in.flatten(), best_margins_in.flatten()],
            positions=[0, 1, 2], labels=['worst', 'random', 'best']);
plt.ylabel('Margin')
plt.savefig('figures/indirect_margins_cora.pdf', format='pdf')

<IPython.core.display.Javascript object>

In [41]:
plt.figure()
plt.scatter(Ev.margins[worst_ids], worst_margins_in.mean(0), label='worst initial margins')
plt.scatter(Ev.margins[random_ids], random_margins_in.mean(0), label='random initial margins')
plt.scatter(Ev.margins[best_ids], best_margins_in.mean(0), label='best initial margins')
plt.plot([-1, 1], [-1, 1], label='random algo')
plt.xlabel('Clean margin')
plt.ylabel('Attacked margin')
plt.legend()
plt.savefig('figures/indirect_scatter_cora.pdf', format='pdf')

<IPython.core.display.Javascript object>

## Citeseer

In [42]:
Ev = Evaluater()
Ev.load_dataset('src/data/citeseer.npz')
Ev.create_splits()
Ev.train_model(surrogate=False)

Selecting 1 largest connected components
Number of training node : 210
Number of validation nodes : 211
Number of unlabeled (unknown) nodes : 1688


  'precision', 'predicted', average, warn_for)


converged after 20 iterations
Validation accuracy : 71.09%
Train accuracy : 100.00%
Unlabeled accuracy : 71.50%


In [43]:
clean_margins = Ev.margins

In [44]:
margin_unlabeled = Ev.margins[Ev.split_unlabeled]
sorted_margins = np.argsort(margin_unlabeled)

In [45]:
worst_ids = Ev.split_unlabeled[sorted_margins[:10]].astype(np.int32)
best_ids = Ev.split_unlabeled[sorted_margins[-10:]].astype(np.int32)
random_ids = Ev.split_unlabeled[np.random.choice(sorted_margins[10:-10], size=20, replace=False)].astype(np.int32)

In [46]:
worst_ids, best_ids, random_ids

(array([ 832,   76,  740, 1718,  759, 1982, 1494, 1531, 1544,  175]),
 array([ 938, 1454, 1687,  817, 2062, 1904, 1388, 1348, 1750,  862]),
 array([1205,  379, 1725, 1521,  425, 1316, 1059, 1609, 2059,  280,  799,
        1349,  576, 1646,  524, 1783, 1441, 1090, 1549,  912]))

In [47]:
Ev.margins[worst_ids].mean(), Ev.margins[best_ids].mean(), Ev.margins[random_ids].mean()

(-0.993879, 0.99986875, 0.5487084)

In [48]:
Ev.train_model(surrogate=True)

converged after 34 iterations
Validation accuracy : 69.67%
Train accuracy : 100.00%
Unlabeled accuracy : 69.43%


### Direct attacks

In [49]:
worst_margins = Ev.produce_margins(worst_ids, direct=True, n_repeats=5, nb_process=0)
random_margins = Ev.produce_margins(random_ids, direct=True, n_repeats=5, nb_process=0)
best_margins = Ev.produce_margins(best_ids, direct=True, n_repeats=5, nb_process=0)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




HBox(children=(IntProgress(value=0), HTML(value='')))




HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




In [50]:
plt.figure()
plt.boxplot([worst_margins.flatten(), random_margins.flatten(), best_margins.flatten()],
            positions=[0, 1, 2], labels=['worst', 'random', 'best']);
plt.ylabel('Margin')
plt.savefig('figures/direct_margins_citeseer.pdf', format='pdf')

<IPython.core.display.Javascript object>

In [51]:
plt.figure()
plt.scatter(Ev.margins[worst_ids], worst_margins.mean(0), label='worst initial margins')
plt.scatter(Ev.margins[random_ids], random_margins.mean(0), label='random initial margins')
plt.scatter(Ev.margins[best_ids], best_margins.mean(0), label='best initial margins')
plt.plot([-1, 1], [-1, 1], label='random algo')
plt.xlabel('Clean margin')
plt.ylabel('Attacked margin')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1dc88c63438>

In [52]:
np.array(worst_margins).mean(), np.array(random_margins).mean(), np.array(best_margins).mean()

(-0.9998784291744233, -0.9890515124797821, -0.8873809486627579)

### Indirect attacks

In [None]:
direct = False
worst_margins_in = Ev.produce_margins(worst_ids, direct=direct, n_repeats=5, nb_process=0)
random_margins_in = Ev.produce_margins(random_ids, direct=direct, n_repeats=5, nb_process=0)
best_margins_in = Ev.produce_margins(best_ids, direct=direct, n_repeats=5, nb_process=0)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))

In [40]:
plt.figure()
plt.boxplot([worst_margins_in.flatten(), random_margins_in.flatten(), best_margins_in.flatten()],
            positions=[0, 1, 2], labels=['worst', 'random', 'best']);
plt.ylabel('Margin')
plt.savefig('figures/indirect_margins_citeseer.pdf', format='pdf')

<IPython.core.display.Javascript object>

In [41]:
plt.figure()
plt.scatter(Ev.margins[worst_ids], worst_margins_in.mean(0), label='worst initial margins')
plt.scatter(Ev.margins[random_ids], random_margins_in.mean(0), label='random initial margins')
plt.scatter(Ev.margins[best_ids], best_margins_in.mean(0), label='best initial margins')
plt.plot([-1, 1], [-1, 1], label='random algo')
plt.xlabel('Clean margin')
plt.ylabel('Attacked margin')
plt.legend()
plt.savefig('figures/indirect_scatter_citeseer.pdf', format='pdf')

<IPython.core.display.Javascript object>

## Polblogs

# Tests

In [13]:
Ev = Evaluater()
Ev.load_dataset('src/data/cora.npz')
Ev.create_splits()
Ev.train_model(surrogate=True)

Selecting 1 largest connected components
Number of training node : 247
Number of validation nodes : 249
Number of unlabeled (unknown) nodes : 1988


  'precision', 'predicted', average, warn_for)


converged after 33 iterations
Validation accuracy : 84.34%
Train accuracy : 100.00%
Unlabeled accuracy : 83.05%


In [14]:
Ev.attack(u=0, verbose=True)

##### Starting attack #####
##### Attack node with ID 0 using structure and feature perturbations #####
##### Attacking the node directly #####
##### Performing 5 perturbations #####
##### ...1/5 perturbations ... #####
Edge perturbation: [   0 2252]
##### ...2/5 perturbations ... #####
Edge perturbation: [   0 1724]
##### ...3/5 perturbations ... #####
Edge perturbation: [   0 1359]
##### ...4/5 perturbations ... #####
Edge perturbation: [   0 2218]
##### ...5/5 perturbations ... #####
Edge perturbation: [   0 2018]


In [15]:
Ev.train_model(surrogate=False, with_perturb=False)

  'precision', 'predicted', average, warn_for)


converged after 25 iterations
Validation accuracy : 85.14%
Train accuracy : 100.00%
Unlabeled accuracy : 83.50%


In [16]:
Ev.nettack.u

0

In [17]:
clean_margins = Ev.margins

In [18]:
Ev.train_model(surrogate=False, with_perturb=True)

converged after 41 iterations
Validation accuracy : 84.34%
Train accuracy : 100.00%
Unlabeled accuracy : 83.40%


In [19]:
attacked_margins = Ev.margins

In [20]:
clean_margins[Ev.nettack.u], attacked_margins[Ev.nettack.u]

(0.5895654, -0.99150693)

In [114]:
plt.figure()
plt.scatter(clean_margins[Ev.split_unlabeled], attacked_margins[Ev.split_unlabeled], label='unlabeled', s=10)
plt.scatter(clean_margins[Ev.split_train], attacked_margins[Ev.split_train], label='train', s=10)
plt.scatter(clean_margins[Ev.split_val], attacked_margins[Ev.split_val], label='val', s=10)
plt.scatter([clean_margins[Ev.nettack.u]], [attacked_margins[Ev.nettack.u]], color='red', s=100, label='attacked')
plt.xlabel('Clean margin')
plt.ylabel('Attacked margin')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x2833e2370b8>