In [1]:
import os.path as osp
import argparse
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import SAGEConv, GCNConv  # noqa

from models import GAT, GCN, GraphSage, get_opts
from GnnAttack import GnnAttack
from utils import process_mask

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset_names = ['Cora', 'CiteSeer', 'PubMed']
model_names = ['GCN', 'GraphSage', 'GAT']
path='./data'
log_dir='gammia_mm_diff_target_log.txt'

In [2]:
def train(model, optimizer, data):
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model(data.x, data.edge_index, data.edge_attr)[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()
    
    return model

@torch.no_grad()
def test(model, data):
    model.eval()
    logits, accs = model(data.x, data.edge_index, data.edge_attr), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs

def train_target(model, optimizer, data):
    best_model = None
    best_val_acc = test_acc = 0
    for epoch in range(1, 201):
        model = train(model, optimizer, data)
        train_acc, val_acc, tmp_test_acc = test(model, data)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            test_acc = tmp_test_acc
            best_model = model
        log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
    print(log.format(epoch, train_acc, best_val_acc, test_acc))
    return best_model

def train_shadow(model_shadow, optimizer, data):
    best_model_shadow = None
    best_val_acc = test_acc = 0
    for epoch in range(1, 201):
        model_shadow = train(model_shadow, optimizer, data)
        train_acc, val_acc, tmp_test_acc = test(model_shadow, data)
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            test_acc = tmp_test_acc
            best_model_shadow = model_shadow
        log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
    print(log.format(epoch, train_acc, best_val_acc, test_acc))
    return best_model_shadow

def attack_nodes_list(models, data):
    attack_in_list = []
    attack_out_list = []
    for i in range(dataset.num_classes):
        atk_in = []
        atk_out = []
        for model in models:
            model.eval()
            logits = model(data.x, data.edge_index, data.edge_attr).detach().cpu().numpy()
            for l in range(len(logits)):
                if data.y[l] == i and data.train_mask[l]:
                    atk_in.append(logits[l])
                if data.y[l] == i and data.test_mask[l]:
                    atk_out.append(logits[l])
        attack_in_list.append(atk_in)
        attack_out_list.append(atk_out)
    
    return attack_in_list, attack_out_list

def attack(attack_in_train_list, attack_out_train_list, attack_in_list, attack_out_list, dataset, verbose=2):
    num_features = len(attack_in_train_list[0][0])
    num_classes = 2
    best_test_acc_list = []
    data = dataset[0].to(device)
    for i in range(dataset.num_classes):
        model = GraphSage(num_features, num_classes).to(device)
        optimizer = get_opts(model, 'GraphSage')
        gam = GnnAttack(data, attack_in_train_list[i], attack_out_train_list[i], attack_in_list[i], attack_out_list[i])
        best_test_acc_list.append(gam.attack(model, optimizer, device=device, epoches=1000, verbose=verbose))
    for i in range(len(best_test_acc_list)):
        print("class {}: {}".format(i, best_test_acc_list[i]))
        
    return best_test_acc_list

def write_log(best_test_acc_list, dataset_name, model_name, model_name_shadow):
    with open(log_dir, 'a') as f:
        f.write('{:.2f}/{:.2f}    '.format(np.mean(best_test_acc_list) * 100, np.max(best_test_acc_list) * 100))

def format_model_name(model_name):
    if model_name == 'GCN':
        return 'GCN          '
    elif model_name == 'GraphSage':
        return 'GraphSage    '
    elif model_name == 'GAT':
        return 'GAT          '

In [3]:
log_title = '\n\n-------------------------------Cora-------------------  ----------------------CiteSeer--------------  --------------------PubMed-----------------\ntar.\\shad.      GCN         GraphSage         GAT            GCN         GraphSage        GAT            GCN          GraphSage        GAT  \n------------------------------------------------------  --------------------------------------------  -------------------------------------------'
with open(log_dir, 'a') as f:
    f.write(log_title)

In [4]:
for model_name in model_names:
    with open(log_dir, 'a') as f:
        f.write('\n{}'.format(format_model_name(model_name)))
    for dataset_name in dataset_names:
        dataset = Planetoid(path, dataset_name, transform=T.NormalizeFeatures())
        data = dataset[0]
        num_features = dataset.num_features
        num_classes = dataset.num_classes
    
        exec("model = {}(num_features, num_classes).to(device)".format(model_name))
        data = data.to(device)
        optimizer = get_opts(model, model_name)
        best_model = train_target(model, optimizer, data)
        attack_in_list, attack_out_list = attack_nodes_list([best_model], data)
        for model_name_shadow in model_names:
            dataset = Planetoid(path, dataset_name, transform=T.NormalizeFeatures())
            data = dataset[0]
            data = process_mask(data).to(device)
            print("dataset: {}, target_model: {}, shadow_model: {}".format(dataset_name, model_name, model_name_shadow))
            shadow_models = []
            for i in range(dataset.num_classes):
#             for i in range(50):
                exec("model_shadow = {}(num_features, num_classes).to(device)".format(model_name_shadow))
                optimizer = get_opts(model_shadow, model_name_shadow)
                shadow_models.append(train_shadow(model_shadow, optimizer, data))
            attack_in_train_list, attack_out_train_list = attack_nodes_list(shadow_models, data)
#             attack_in_list, attack_out_list = attack_nodes_list([best_model], data)
            best_test_acc_list = attack(attack_in_train_list, attack_out_train_list, attack_in_list, attack_out_list, dataset, verbose=2)
            write_log(best_test_acc_list, dataset_name, model_name, model_name_shadow)

Epoch: 200, Train: 1.0000, Val: 0.7860, Test: 0.8000
dataset: Cora, target_model: GCN, shadow_model: GCN
Epoch: 200, Train: 0.9929, Val: 0.8540, Test: 0.7850
Epoch: 200, Train: 1.0000, Val: 0.8360, Test: 0.7910
Epoch: 200, Train: 0.9929, Val: 0.8640, Test: 0.7880
Epoch: 200, Train: 1.0000, Val: 0.8440, Test: 0.7850
Epoch: 200, Train: 1.0000, Val: 0.8560, Test: 0.7950
Epoch: 200, Train: 1.0000, Val: 0.8440, Test: 0.7670
Epoch: 200, Train: 1.0000, Val: 0.8580, Test: 0.7660
Constructing graph dataset...
preparing nodes...
member: 1354, all: 2708
member train: 70, train: 140
Nodes: torch.Size([2708, 7])
preparing edges...
number of edges: 5446
graph dataset constructed!
********************************************************************************
test_acc: 0.723
********************************************************************************
Constructing graph dataset...
preparing nodes...
member: 1354, all: 2708
member train: 70, train: 140
Nodes: torch.Size([2708, 7])
preparing edges.