In [1]:
import os
import numpy as np
import pandas as pd
import random
import json
from subprocess import Popen, PIPE, STDOUT
import torch
import torch.nn as nn
from torch.autograd import Variable, grad
from bleu import compute_bleu
from models import load_models, generate
from utils import batchify, to_gpu
from utils import Corpus, filter_flip_polarity
random.seed(1111)
np.random.seed(1111)
torch.manual_seed(1111)

<torch._C.Generator at 0x7fb9a0093070>

In [2]:
#MODEL_DIR = './output/hsieh_bpe_20_epochs'
MODEL_DIR = './output/hsieh_bpe_20_epochs_pgd_0.05_0.001_40'
DATA_DIR = './data/hsieh_bpe'
BATCH_SIZE = 35

In [3]:
model_args, idx2word, autoencoder, gan_gen, gan_disc, enc_classifier \
        = load_models(MODEL_DIR, suffix="_10", on_gpu=True, arch_cl="100")

# not needed
del gan_gen
del gan_disc
torch.cuda.empty_cache()

word2idx = json.load(open("{}/vocab.json".format(MODEL_DIR), "r"))

Loading models from./output/hsieh_bpe_20_epochs_pgd_0.05_0.001_40


In [4]:
corpus = Corpus(DATA_DIR,
                maxlen=30,
                vocab_size=12000,
                lowercase=False,
                max_lines=100000,
                test_size=-1,
                load_vocab_file=os.path.join(MODEL_DIR, 'vocab.json'),
                test_path='test.txt',)

Loaded vocab file ./output/hsieh_bpe_20_epochs_pgd_0.05_0.001_40/vocab.json with 5971 words
Number of sentences cropped from ./data/hsieh_bpe/train.txt: 0 out of 100000 total, dropped 1517. OOV rate 0.000
Using test.txt as test set
Number of sentences cropped from ./data/hsieh_bpe/test.txt: 0 out of 100000 total, dropped 1538. OOV rate 0.000


In [5]:
test_data = batchify(corpus.test, bsz=BATCH_SIZE, shuffle=False, pad_id=0)

In [6]:
criterion_ce = nn.CrossEntropyLoss().cuda()

In [7]:
def fgsm_attack(sentence_embedding, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_embedding = sentence_embedding + epsilon*sign_data_grad
    #clip within normal range for embedding
    perturbed_embedding = torch.clamp(perturbed_embedding, -0.34, 0.32)
    return perturbed_embedding

In [8]:
def evaluate_model(data, perturb=None, epsilon=.015, alpha=.015, pgd_iters=49):

    # perturb can be 'fgsm' or 'pgd' (to apply perturbation live) or None to evaluate without applying any perturbations

    all_pred = []
    all_tags = []

    for i, batch in enumerate(data):
        source, target, lengths, tags = batch
        source = to_gpu(True, Variable(source))
        #target = to_gpu(True, Variable(target)) # word ID
        tags = to_gpu(True, Variable(tags))

        # autoencoder encoded
        output_encode_only = autoencoder(source, lengths, noise=False, encode_only=True)
        output_encode_only.retain_grad()  # NL: same as output_encode_only.requires_grad = True

        # initial classifier output
        output_classifier = enc_classifier(output_encode_only)

        # apply perturbation
        if perturb == 'fgsm':
            classifier_loss = criterion_ce(output_classifier, tags)
            enc_classifier.zero_grad()
            classifier_loss.backward()
            code_grad = output_encode_only.grad.data
            perturbed_code = fgsm_attack(output_encode_only, epsilon, code_grad)   

            # get classifier predictions on the perturbed code
            scores = enc_classifier(perturbed_code)

        elif perturb == 'pgd':
            # alpha: step size
            # epsilon: max perturbation (ball)
            perturbed_code = output_encode_only.clone().detach()
            for i in range(pgd_iters):
                perturbed_code.requires_grad = True
                scores = enc_classifier(perturbed_code)
                tmp_loss = criterion_ce(scores, tags)
                enc_classifier.zero_grad()
                tmp_loss.backward(retain_graph=True)

                # step in the direction of the gradient
                perturbed_code = perturbed_code + alpha * perturbed_code.grad.sign()

                # Workaround as PyTorch doesn't have elementwise clip
                # from: https://gist.github.com/oscarknagg/45b187c236c6262b1c4bbe2d0920ded6#file-projected_gradient_descent-py
                perturbed_code = torch.max(torch.min(perturbed_code, output_encode_only + epsilon), output_encode_only - epsilon).detach()
                perturbed_code = torch.clamp(perturbed_code, -0.34, 0.32)

            # get classifier predictions on the perturbed code
            scores = enc_classifier(perturbed_code)

        else:
            scores = output_classifier

        # get preds
        _, output_classifier_argmax = torch.max(scores, -1)
        pred = output_classifier_argmax.cpu().numpy()
        
        all_pred.extend(pred)
        all_tags.extend(tags.cpu().numpy())
        
    #return all_tags
    accuracy = (np.array(all_pred) == np.array(all_tags)).mean()
    return accuracy

In [41]:
acc = evaluate_model(test_data, perturb=None)
print(acc)

0.9791986186582703


In [21]:
eps_range = [np.round(x, 3) for x in np.arange(1e-3, 1e-1, 5e-3)]
for epsilon in eps_range:
    acc = evaluate_model(test_data, perturb='fgsm', epsilon=epsilon)
    print('FGSM: Epsilon {}, Acc: {}'.format(epsilon, acc))

FGSM: Epsilon 0.001, Acc: 0.9743943933776852
FGSM: Epsilon 0.006, Acc: 0.9312680920217358
FGSM: Epsilon 0.011, Acc: 0.8109999492153775
FGSM: Epsilon 0.016, Acc: 0.43640241734802704
FGSM: Epsilon 0.021, Acc: 0.0631862272104007
FGSM: Epsilon 0.026, Acc: 0.0024275049515006856
FGSM: Epsilon 0.031, Acc: 8.125539586613174e-05
FGSM: Epsilon 0.036, Acc: 1.0156924483266467e-05
FGSM: Epsilon 0.041, Acc: 2.0313848966532935e-05
FGSM: Epsilon 0.046, Acc: 2.0313848966532935e-05
FGSM: Epsilon 0.051, Acc: 2.0313848966532935e-05
FGSM: Epsilon 0.056, Acc: 2.0313848966532935e-05
FGSM: Epsilon 0.061, Acc: 2.0313848966532935e-05
FGSM: Epsilon 0.066, Acc: 1.0156924483266467e-05
FGSM: Epsilon 0.071, Acc: 0.0
FGSM: Epsilon 0.076, Acc: 0.0
FGSM: Epsilon 0.081, Acc: 1.0156924483266467e-05
FGSM: Epsilon 0.086, Acc: 0.0
FGSM: Epsilon 0.091, Acc: 1.0156924483266467e-05
FGSM: Epsilon 0.096, Acc: 1.0156924483266467e-05


In [28]:
configs = [(0.05, 0.001, 5), (0.05, 0.001, 10), (0.05, 0.001, 20), (0.05, 0.001, 40)]
for epsilon, alpha, pgd_iters in configs:
    acc = evaluate_model(test_data, perturb='pgd', epsilon=epsilon, alpha=alpha, pgd_iters=pgd_iters)
    print('PGD: Epsilon {} Alpha {} Steps {}, Acc: {}'.format(epsilon, alpha, pgd_iters, acc))

PGD: Epsilon 0.05 Alpha 0.001 Steps 5, Acc: 0.9428063582347265
PGD: Epsilon 0.05 Alpha 0.001 Steps 10, Acc: 0.8353257833528007
PGD: Epsilon 0.05 Alpha 0.001 Steps 20, Acc: 0.084505611700777
PGD: Epsilon 0.05 Alpha 0.001 Steps 40, Acc: 0.0


In [67]:
torch.max(scores, -1)

torch.return_types.max(
values=tensor([ 4.6077, 11.8412, 13.5215,  1.4843,  1.1130,  3.2549,  6.2035,  7.7105,
        11.6899,  0.6113, 14.4979,  4.6565,  2.5570,  0.9846,  8.5023, 10.7292,
         1.6135,  3.4619,  4.2871,  2.6490,  1.6265,  3.2528,  3.6785,  2.4603,
         3.2518,  1.9567,  3.4394,  2.5880,  1.9504,  1.0755,  2.2231,  4.9804,
         0.8627,  1.8109,  3.7487], device='cuda:0', grad_fn=<MaxBackward0>),
indices=tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0'))

In [66]:
scores.size()

torch.Size([35, 3])

In [59]:
print(evaluate_model(test_data, perturb='fgsm', epsilon=0))

0.9770555075923011


In [60]:
print(evaluate_model(test_data, perturb='pgd', epsilon=0, alpha=.01, pgd_iters=40))

NameError: name 'F' is not defined

In [58]:
print(accuracy)

0.9772383322329998


<b>Testing FGSM trained model, epsilon=0.016</b>

In [9]:
acc = evaluate_model(test_data, perturb=None)
print(acc)

0.9779391600223453


In [10]:
eps_range = [np.round(x, 3) for x in np.arange(1e-3, 1e-1, 5e-3)]
for epsilon in eps_range:
    acc = evaluate_model(test_data, perturb='fgsm', epsilon=epsilon)
    print('FGSM: Epsilon {}, Acc: {}'.format(epsilon, acc))

FGSM: Epsilon 0.001, Acc: 0.9776953938347468
FGSM: Epsilon 0.006, Acc: 0.9754608704484282
FGSM: Epsilon 0.011, Acc: 0.9738459194555888
FGSM: Epsilon 0.016, Acc: 0.9718551622568686
FGSM: Epsilon 0.021, Acc: 0.966339952262455
FGSM: Epsilon 0.026, Acc: 0.9280483469605404
FGSM: Epsilon 0.031, Acc: 0.7972779442384846
FGSM: Epsilon 0.036, Acc: 0.6343202478289574
FGSM: Epsilon 0.041, Acc: 0.5017114417754304
FGSM: Epsilon 0.046, Acc: 0.4060027423696105
FGSM: Epsilon 0.051, Acc: 0.33819511451932355
FGSM: Epsilon 0.056, Acc: 0.28439388553146105
FGSM: Epsilon 0.061, Acc: 0.24567568940124931
FGSM: Epsilon 0.066, Acc: 0.21379310344827587
FGSM: Epsilon 0.071, Acc: 0.19328627291656086
FGSM: Epsilon 0.076, Acc: 0.177319587628866
FGSM: Epsilon 0.081, Acc: 0.17083946980854198
FGSM: Epsilon 0.086, Acc: 0.16199278858361688
FGSM: Epsilon 0.091, Acc: 0.16236859478949775
FGSM: Epsilon 0.096, Acc: 0.1595652836321162


In [9]:
configs = [(0.05, 0.001, 5), (0.05, 0.001, 10), (0.05, 0.001, 20), (0.05, 0.001, 40)]
for epsilon, alpha, pgd_iters in configs:
    acc = evaluate_model(test_data, perturb='pgd', epsilon=epsilon, alpha=alpha, pgd_iters=pgd_iters)
    print('PGD: Epsilon {} Alpha {} Steps {}, Acc: {}'.format(epsilon, alpha, pgd_iters, acc))

PGD: Epsilon 0.05 Alpha 0.001 Steps 5, Acc: 0.9757249504849932
PGD: Epsilon 0.05 Alpha 0.001 Steps 10, Acc: 0.9737138794373064
PGD: Epsilon 0.05 Alpha 0.001 Steps 20, Acc: 0.9661571276217561
PGD: Epsilon 0.05 Alpha 0.001 Steps 40, Acc: 0.09222487430805952


<b> Testing PGD trained model, hsieh_bpe_20_epochs_pgd_0.05_0.001_40 </b>

In [9]:
acc = evaluate_model(test_data, perturb=None)
print(acc)

0.9774008430247321


In [10]:
eps_range = [np.round(x, 3) for x in np.arange(1e-3, 1e-1, 5e-3)]
for epsilon in eps_range:
    acc = evaluate_model(test_data, perturb='fgsm', epsilon=epsilon)
    print('FGSM: Epsilon {}, Acc: {}'.format(epsilon, acc))

FGSM: Epsilon 0.001, Acc: 0.9766086029150374
FGSM: Epsilon 0.006, Acc: 0.9729317962520949
FGSM: Epsilon 0.011, Acc: 0.9689502818546544
FGSM: Epsilon 0.016, Acc: 0.9638921334619877
FGSM: Epsilon 0.021, Acc: 0.958183941902392
FGSM: Epsilon 0.026, Acc: 0.9525874765121122
FGSM: Epsilon 0.031, Acc: 0.9451729216393276
FGSM: Epsilon 0.036, Acc: 0.9361637296226702
FGSM: Epsilon 0.041, Acc: 0.9260982174597532
FGSM: Epsilon 0.046, Acc: 0.9148646589812605
FGSM: Epsilon 0.051, Acc: 0.8990604844852979
FGSM: Epsilon 0.056, Acc: 0.8816210451475294
FGSM: Epsilon 0.061, Acc: 0.8597227159616069
FGSM: Epsilon 0.066, Acc: 0.8305723426946321
FGSM: Epsilon 0.071, Acc: 0.7955309532273628
FGSM: Epsilon 0.076, Acc: 0.7532781473769743
FGSM: Epsilon 0.081, Acc: 0.7050632268549083
FGSM: Epsilon 0.086, Acc: 0.6491595144990097
FGSM: Epsilon 0.091, Acc: 0.5908282971916103
FGSM: Epsilon 0.096, Acc: 0.5292671778985323


In [11]:
configs = [(0.05, 0.001, 5), (0.05, 0.001, 10), (0.05, 0.001, 20), (0.05, 0.001, 40)]
for epsilon, alpha, pgd_iters in configs:
    acc = evaluate_model(test_data, perturb='pgd', epsilon=epsilon, alpha=alpha, pgd_iters=pgd_iters)
    print('PGD: Epsilon {} Alpha {} Steps {}, Acc: {}'.format(epsilon, alpha, pgd_iters, acc))

PGD: Epsilon 0.05 Alpha 0.001 Steps 5, Acc: 0.9735412117210909
PGD: Epsilon 0.05 Alpha 0.001 Steps 10, Acc: 0.9691127926463867
PGD: Epsilon 0.05 Alpha 0.001 Steps 20, Acc: 0.9596973236503986
PGD: Epsilon 0.05 Alpha 0.001 Steps 40, Acc: 0.9277741099994922
