In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import torch
from deeprobust.graph.defense import GCN, GCNJaccard
import numpy as np
from torch_geometric.data import Data, DataLoader
import pandas as pd
import pickle as pkl

In [2]:
np.random.seed(123)

In [3]:
device = torch.device('cuda:0')

## Alpha Network

In [4]:
def load_npz_raw(file_name):
    """
    for already processed bitcoin alpha network. See read_node_attr.ipynb 
    """
    if not file_name.endswith('.npz'):
        file_name += '.npz'
    with np.load(file_name, allow_pickle=True) as loader:
        loader = dict(loader)
        adj_matrix = loader['_A_obs']
        attr_matrix = loader['_X_obs']
        labels = loader.get('_z_obs')
    return adj_matrix, attr_matrix, labels

In [5]:
adj, attr, labels = load_npz_raw("../GCN_ADV_Train/bitcoin_alpha_eigens.npz")

In [6]:
adj

array([[ 0. ,  0.1,  0. , ...,  0. ,  0.1,  0. ],
       [ 0.4,  0. ,  0.1, ...,  0. , -1. ,  0. ],
       [ 0. ,  0. ,  0. , ...,  0. ,  0. , -1. ],
       ...,
       [ 0. ,  0. ,  0. , ...,  0. ,  0. ,  1. ],
       [ 0.2,  0.9,  0. , ...,  0. ,  0. ,  0. ],
       [ 0. ,  0. ,  0. , ...,  1. , -1. ,  0. ]])

In [7]:
attr

array([[ 0.08186567+0.j        , -0.03081963+0.j        ,
        -0.09929379+0.j        , ..., -0.01476412+0.j        ,
        -0.01991514+0.j        ,  0.02788606-0.00939905j],
       [ 0.20409755+0.j        , -0.19004447+0.j        ,
        -0.00419816+0.j        , ...,  0.00404818+0.j        ,
         0.01845753+0.j        ,  0.03371585+0.01276114j],
       [ 0.15657238+0.j        ,  0.24720236+0.j        ,
        -0.19258863+0.j        , ...,  0.02289012+0.j        ,
         0.00938968+0.j        , -0.01470755-0.02335312j],
       ...,
       [-0.08296632+0.j        , -0.08043238+0.j        ,
        -0.22398007+0.j        , ...,  0.00594582+0.j        ,
         0.0156617 +0.j        , -0.0109656 +0.02007655j],
       [ 0.18767421+0.j        , -0.21002437+0.j        ,
        -0.0875027 +0.j        , ..., -0.00407662+0.j        ,
         0.01856113+0.j        ,  0.02889957+0.04112684j],
       [-0.07051254+0.j        , -0.0336177 +0.j        ,
        -0.18959818+0.j       

In [8]:
train_mask = ~np.isnan(labels)
train_indices = np.nonzero(train_mask)[0]
np.random.shuffle(train_indices)
val_indices = train_indices[:len(train_indices)//2]                         # random selection
train_indices = train_indices[len(train_indices)//2:]
train_mask = np.zeros_like(train_mask)
val_mask = np.zeros_like(train_mask)
train_mask[train_indices] = 1                                               # get train and val masks
val_mask[val_indices] = 1                                               

new_y_train = labels[train_mask]                                                # get train and val labels (-1 and 1)
y_train = np.zeros([train_mask.shape[0], 2])
y_train[train_mask, 0] = (new_y_train == -1).astype(int)
y_train[train_mask, 1] = (new_y_train == 1).astype(int)

new_y_val = labels[val_mask]
y_val = np.zeros([train_mask.shape[0], 2])
y_val[val_mask, 0] = (new_y_val == -1).astype(int)
y_val[val_mask, 1] = (new_y_val == 1).astype(int)

test_mask = np.isnan(labels)
y_test = np.zeros([test_mask.shape[0], 2]).astype(int)

test_mask, val_mask = val_mask, test_mask                                   ###############
y_test, y_val = y_val, y_test  

new_y_train[new_y_train==-1] = 0
new_y_val[new_y_val==-1] = 0
y_test = new_y_val
y_train = new_y_train
test_indices = val_indices


In [9]:
adj_matrix = adj.copy()
adj[adj!=0] = 1

In [10]:
# indices = np.nonzero(adj)
# edge_index = torch.tensor(indices, dtype=torch.long)
# dataset = Data(x=attr, edge_index=edge_index, edge_attr=None)
# dataset
# attr = torch.from_numpy(attr).real
# adj = torch.from_numpy(adj)
# labels = torch.from_numpy(labels)
# train_indices = torch.from_numpy(train_indices)

In [11]:
attr = attr.real

In [12]:
labels[labels==-1] = 0

In [13]:
# attr, adj, train_indices, labels = attr.to(device), adj.to(device), train_indices.to(device), labels.to(device)

In [14]:
gcn_jaccard = GCNJaccard(attr.shape[1], 32, 2, device=device)

In [15]:
gcn_jaccard = gcn_jaccard.to(device)

In [16]:
gcn_jaccard.fit(features=attr, adj=adj, labels=labels, idx_train=train_indices)

removed 0 edges in the original graph
Epoch 0, training loss: 0.6822478175163269
Epoch 10, training loss: 0.5975603461265564
Epoch 20, training loss: 0.5792514085769653
Epoch 30, training loss: 0.5576171278953552
Epoch 40, training loss: 0.5483308434486389
Epoch 50, training loss: 0.5350002646446228
Epoch 60, training loss: 0.5231663584709167
Epoch 70, training loss: 0.5127209424972534
Epoch 80, training loss: 0.4914461076259613
Epoch 90, training loss: 0.4902454614639282
Epoch 100, training loss: 0.4767161011695862
Epoch 110, training loss: 0.45659762620925903
Epoch 120, training loss: 0.463367223739624
Epoch 130, training loss: 0.4573913812637329
Epoch 140, training loss: 0.42881426215171814
Epoch 150, training loss: 0.4207516014575958
Epoch 160, training loss: 0.4072168171405792
Epoch 170, training loss: 0.40532606840133667
Epoch 180, training loss: 0.4077036380767822
Epoch 190, training loss: 0.38517752289772034


In [17]:
gcn_jaccard.eval()

GCNJaccard(
  (gc1): GraphConvolution (100 -> 32)
  (gc2): GraphConvolution (32 -> 2)
)

In [18]:
output = gcn_jaccard.test(test_indices)

Test set results: loss= 0.6833 accuracy= 0.7542


In [19]:
new_pred = gcn_jaccard.predict(attr, adj)

removed 0 edges in the original graph


In [20]:
new_pred

tensor([[-1.6836e+01,  0.0000e+00],
        [-5.3285e+00, -4.8632e-03],
        [-1.8218e+01,  0.0000e+00],
        ...,
        [-4.0054e-01, -1.1085e+00],
        [-3.1646e+00, -4.3150e-02],
        [-1.7840e+00, -1.8387e-01]], device='cuda:0',
       grad_fn=<LogSoftmaxBackward>)

In [21]:
new_pred = torch.argmax(new_pred, 1).cpu()

In [22]:
pkl.dump(new_pred.numpy(), open('pred_gcn_jaccard_alpha.pkl', 'wb'))

In [23]:
from DeepRobust.deeprobust.graph.global_attack.topology_attack import PGDAttack

In [25]:
from deeprobust.graph.utils import *
import torch.nn.functional as F

In [26]:
def test(new_adj, gcn=None):
    ''' test on GCN '''
    new_adj = new_adj.cpu().numpy()
    if gcn is None:
        # adj = normalize_adj_tensor(adj)
        gcn = GCNJaccard(attr.shape[1], 32, 2, device=device)
        gcn = gcn.to(device)
        # gcn.fit(features, new_adj, labels, idx_train) # train without model picking
        gcn.fit(attr, new_adj, labels, train_indices) # train with validation model picking
        gcn.eval()
        output = gcn.predict().cpu()
    else:
        gcn.eval()
        output = gcn.predict(attr, new_adj).cpu()

    loss_test = F.nll_loss(output[test_indices].detach(), torch.tensor(labels[test_indices], dtype=torch.long))
    acc_test = accuracy(output[test_indices].detach(), torch.tensor(labels[test_indices], dtype=torch.long))
    print("Test set results:",
        "loss= {:.4f}".format(loss_test.item()),
        "accuracy= {:.4f}".format(acc_test.item()))

In [27]:
# Setup Attack Model
print('=== setup attack model ===')
model = PGDAttack(model=gcn_jaccard, nnodes=adj.shape[0], loss_type='CE', device=device)
model = model.to(device)

epochs = 50
ptb_rate = 0.05

perturbations = int(ptb_rate * (adj.sum()//2))
features = torch.from_numpy(attr)
adj_tensor = torch.from_numpy(adj)

# model.attack(features, adj, labels, idx_train, perturbations, epochs=args.epochs)
# Here for the labels we need to replace it with predicted ones
pseudo_labels = gcn_jaccard.predict(attr, adj)
pseudo_labels = torch.argmax(pseudo_labels, 1).cpu()
# Besides, we need to add the idx into the whole process
pseudo_indices = np.concatenate([train_indices, test_indices])

idx_others = list(set(np.arange(len(labels))) - set(train_indices))
pseudo_labels = torch.cat([torch.from_numpy(labels[train_indices]), pseudo_labels[idx_others]])
model.attack(attr, adj, pseudo_labels.numpy(), pseudo_indices, perturbations, epochs=epochs)

# # if you want to save the modified adj/features, uncomment the code below
# model.save_adj(root='./', name=f'mod_adj')
# model.save_features(root='./', name='mod_features')


  0%|          | 0/50 [00:00<?, ?it/s]

=== setup attack model ===
removed 0 edges in the original graph


100%|██████████| 50/50 [00:05<00:00,  8.34it/s]


In [28]:
print('=== testing GCN on clean graph ===')
test(torch.tensor(adj), gcn_jaccard)

=== testing GCN on clean graph ===
removed 0 edges in the original graph
Test set results: loss= 0.6833 accuracy= 0.7542


In [29]:
print('=== testing GCN on Evasion attack ===')

modified_adj = model.modified_adj
test(modified_adj, gcn_jaccard)


=== testing GCN on Evasion attack ===
removed 0 edges in the original graph
Test set results: loss= 1.1614 accuracy= 0.5085


In [30]:

# modified_features = model.modified_features
print('=== testing GCN on Poisoning attack ===')
test(modified_adj)

=== testing GCN on Poisoning attack ===
removed 0 edges in the original graph
Epoch 0, training loss: 0.6629500389099121
Epoch 10, training loss: 0.6200554370880127
Epoch 20, training loss: 0.6070672869682312
Epoch 30, training loss: 0.5993247628211975
Epoch 40, training loss: 0.5910260081291199
Epoch 50, training loss: 0.5827901363372803
Epoch 60, training loss: 0.5793148875236511
Epoch 70, training loss: 0.574871838092804
Epoch 80, training loss: 0.5614697933197021
Epoch 90, training loss: 0.5580852031707764
Epoch 100, training loss: 0.5366182327270508
Epoch 110, training loss: 0.536301851272583
Epoch 120, training loss: 0.545376181602478
Epoch 130, training loss: 0.5398913025856018
Epoch 140, training loss: 0.5209358334541321
Epoch 150, training loss: 0.528473436832428
Epoch 160, training loss: 0.5196252465248108
Epoch 170, training loss: 0.5290324687957764
Epoch 180, training loss: 0.5184369087219238
Epoch 190, training loss: 0.5002232789993286
Test set results: loss= 0.6546 accura