In [None]:
!pip install deeprobust==0.2.4

!pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.10.0+cu113.html

Collecting deeprobust==0.2.4
  Downloading deeprobust-0.2.4-py3-none-any.whl (191 kB)
[?25l[K     |█▊                              | 10 kB 34.2 MB/s eta 0:00:01[K     |███▍                            | 20 kB 35.8 MB/s eta 0:00:01[K     |█████▏                          | 30 kB 40.3 MB/s eta 0:00:01[K     |██████▉                         | 40 kB 27.6 MB/s eta 0:00:01[K     |████████▌                       | 51 kB 16.3 MB/s eta 0:00:01[K     |██████████▎                     | 61 kB 14.1 MB/s eta 0:00:01[K     |████████████                    | 71 kB 13.3 MB/s eta 0:00:01[K     |█████████████▊                  | 81 kB 14.7 MB/s eta 0:00:01[K     |███████████████▍                | 92 kB 13.4 MB/s eta 0:00:01[K     |█████████████████               | 102 kB 12.5 MB/s eta 0:00:01[K     |██████████████████▉             | 112 kB 12.5 MB/s eta 0:00:01[K     |████████████████████▌           | 122 kB 12.5 MB/s eta 0:00:01[K     |██████████████████████▎         | 133 kB 12

# 第四次作业

在本次作业中，我们来实现一些简单的攻击方法。具体地，我们将对原邻接矩阵分别进行加边和减边操作。对于测试模型，我们使用`deeprobust.graph.defense.DeepWalk`。

In [None]:
from deeprobust.graph.data import Dataset
from deeprobust.graph.defense import DeepWalk

dataset_str = 'cora'
data = Dataset(root='./', name=dataset_str, setting='prognn')
adj, features, labels = data.adj, data.features, data.labels
idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test

Loading cora dataset...
Downloading from https://raw.githubusercontent.com/danielzuegner/gnn-meta-attack/master/data/cora.npz to ./cora.npz
Done!
Selecting 1 largest connected components
Dowloading from https://raw.githubusercontent.com/ChandlerBang/Pro-GNN/master/splits/cora_prognn_splits.json to ./cora_prognn_splits.json


In [None]:
defender = DeepWalk()
defender.fit(adj)
pred, micro_f1, macro_f1= defender.evaluate_node_classification(labels, idx_train, idx_test)

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics for help.

File "../usr/local/lib/python3.7/dist-packages/deeprobust/graph/defense/node_embedding.py", line 332:
@numba.jit(nopython=True, parallel=True)
def _random_walk(indptr, indices, walk_length, walks_per_node, seed):
^

  state.func_ir.loc))


Micro F1: 0.7892354124748491
Macro F1: 0.768282312748032


下面我们来实现一个简单的攻击方式。

In [None]:
import numpy as np
import scipy.sparse as sp

class SimpleAttack():
    """
    首先得到一定数量的边（称之为candidates），然后从候选边当中
    根据某种方式（random或者degree）来选择最后需要扰动的边。
        
    参数说明
    ----
    metric: random或者degree。random表示随机选择，degree表示选择degree更大的边。
    attack_type: add或者remove，分别表示加边和减边。
    """
    def __init__(self, metric, attack_type):
        self.metric = metric
        self.attack_type = attack_type
                 
    def attack(self, adj, n_perturbations=1000, n_candidates=10000, seed=0):
        if self.attack_type == 'add':
            # 加边的时候我们设置一下n_candidates来限制选择范围，不然可以选择的边实在是太多了。
            candidates = self.generate_candidates_addition(adj, n_candidates, seed) 
        elif self.attack_type == 'remove':
            candidates = self.generate_candidates_removal(adj)
        else:
            raise NotImplementedError

        if self.metric == "random":
            top_flips = self.random_top_flips(candidates, n_perturbations, seed)
        elif self.metric == "degree":
            top_flips = self.degree_top_flips(adj, candidates, n_perturbations)
        else:
            raise NotImplementedError
        
        assert len(top_flips) == n_perturbations
        modified_adj = self.flip_candidates(adj, top_flips)
        self.modified_adj = modified_adj

    def generate_candidates_removal(self, adj):
        """产生减边的候选边：从当前的所有边中除开那些会产生孤立节点的边。
        """
        ###################
        ##### 代码填空 #####
        ###################
        degree = adj.sum(axis=1).A1
        N = adj.shape[0]

        i = np.arange(N)[degree > 1]
        js= np.split(adj.indices, adj.indptr[1:-1])

        candidates = []

        for ii in np.random.permutation(i):
            for j in np.random.permutation(js[ii]):
                if degree[ii] > 1 and degree[j] > 1:
                    candidates.append([ii, j])

                    degree[ii] -= 1
                    degree[j] -= 1

        candidates = np.array(candidates)

        return candidates
    
    def generate_candidates_addition(self, adj, n_candidates, seed=None):
        """产生可以被加边的候选边（也就是说，现在不是边)。
        """
        if seed is not None:
            np.random.seed(seed)

        ###################
        ##### 代码填空 #####
        ###################

        candidates = np.zeros((n_candidates, 2), dtype=np.int)
        N = adj.shape[0]

        n = 0
        while n < n_candidates:
            i, j = np.random.randint(0, N, size=2)

            if j < i:
                i, j = j, i
            elif i == j:
                continue
            
            if adj[i, j]:
                continue
            
            skip = False
            for k in range(n):
                if candidates[k, 0] == i and candidates[k, 1] == j:
                    skip = True
                    break
            
            if skip:
                continue

            candidates[n, 0] = i
            candidates[n, 1] = j

            n += 1

        return candidates
    
    def random_top_flips(self, candidates, n_perturbations, seed=None):
        """从candidates中随机选择n_perturbations个候选边。
        """
        ###################
        ##### 代码填空 #####
        ###################
        if seed is not None:
            np.random.seed(seed)
        return candidates[np.random.permutation(len(candidates))[:n_perturbations]]

    def degree_top_flips(self, adj, candidates, n_perturbations):
        """从candidates中随机选择n_perturbations个degree最大的候选边。
        这里，边的degree我们可以计算为它连接的节点的degree的相加。
        """
        ###################
        ##### 代码填空 #####
        ###################
        degree = adj.sum(axis=1).A1

        ## take sum of degree[i] and degree[j]
        deg_argsort = np.argsort(degree[candidates[:, 0]] + degree[candidates[:, 1]])

        return candidates[deg_argsort[-n_perturbations:]]
    
    def flip_candidates(self, adj, candidates):
        """翻转候选边，0变成1，1变成0。

        返回值: sp.csr_matrix, shape [n_nodes, n_nodes]
           翻转后的邻接矩阵。
        """
        ###################
        ##### 代码填空 #####
        ###################
        adj_flipped = adj.copy().tolil()

        ## flip
        adj_flipped[candidates[:, 0], candidates[:, 1]] = 1 - adj[candidates[:, 0], candidates[:, 1]]
        adj_flipped[candidates[:, 1], candidates[:, 0]] = 1 - adj[candidates[:, 1], candidates[:, 0]]

        adj_flipped = adj_flipped.tocsr()
        adj_flipped.eliminate_zeros()

        return adj_flipped


In [None]:
import itertools
comb = itertools.product(["random", "degree"], ["add", "remove"])

for metric, attack_type in comb:
    print(metric, attack_type)
    attacker = SimpleAttack(metric=metric, attack_type=attack_type)
    attacker.attack(adj, n_perturbations=1000)
    modified_adj = attacker.modified_adj
    defender = DeepWalk()
    defender.fit(modified_adj)
    _ = defender.evaluate_node_classification(labels, idx_train, idx_test)

random add
Micro F1: 0.7168008048289739
Macro F1: 0.6867181658118211
random remove
Micro F1: 0.7525150905432596
Macro F1: 0.7176036780984821
degree add
Micro F1: 0.7052313883299799
Macro F1: 0.6748379563942785
degree remove
Micro F1: 0.7711267605633803
Macro F1: 0.7444437921447655


比较之后我们会发现，加边比减边更具有破坏力。

**我们也可以直接调用DeepRobust来完成这个任务**：

In [None]:
from deeprobust.graph.global_attack import OtherNodeEmbeddingAttack
import itertools
comb = itertools.product(["random", "degree"], ["add", "remove"])

for type_, attack_type in comb:
    model = OtherNodeEmbeddingAttack(type=type_)
    print(type_, attack_type)
    model.attack(adj, attack_type=attack_type, n_candidates=10000)
    modified_adj = model.modified_adj
    defender = DeepWalk()
    defender.fit(modified_adj)
    defender.evaluate_node_classification(labels, idx_train, idx_test)

random add
Micro F1: 0.6921529175050302
Macro F1: 0.6519502403604314
random remove
Micro F1: 0.7681086519114688
Macro F1: 0.7439817375277473
degree add
Micro F1: 0.7233400402414487
Macro F1: 0.6944224040940351
degree remove
Micro F1: 0.7701207243460764
Macro F1: 0.7426401315021
