In [None]:
!pip install textattack[tensorflow]
!pip install torchfile

In [None]:
import textattack
import torchtext
import torch
import torch
from torchtext.datasets import IMDB, AG_NEWS, YahooAnswers
from torchtext.vocab import GloVe
from torchtext.data import to_map_style_dataset
from torchtext.data.utils import get_tokenizer
from torch.nn.utils.rnn import pad_sequence
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader, random_split, Dataset
from torch.optim import Adam
import torch.nn.functional as F
from torch.nn import LSTM, GRU, Linear, Softmax, Conv2d, Dropout
from tqdm import tqdm
import nltk
from nltk.corpus import wordnet as wn
import numpy as np

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
DATASET = 'IMDB'  # choose from IMDB, AG_NEWS, YahooAnswers
MODEL = 'LSTM'  # choose from: GRU, LSTM, CNN, BERT, CNN2
num_classes = 2
tokenizer = get_tokenizer('basic_english')
train_set = IMDB(split='train')
test_set = IMDB(split='test')

In [None]:
class ClassificationDataset(Dataset):
    def __init__(self, dataset, num_classes, tokenizer, model):
        self.num_classes = num_classes
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.model = model

    def __len__(self):
        return self.dataset.__len__()

    def __getitem__(self, idx):
        label, text = self.dataset.__getitem__(idx)
        if type(label) == str:
            if label == 'neg':
                label = 0
            else:
                label = 1
        else:
            label = int(label) - 1

        if self.model == 'BERT':
            return label, self.tokenizer(text, padding="max_length", return_tensors='pt', max_length=512, truncation=True)
        else:
            return label, self.tokenizer(text)

In [None]:
class AugmentDataset(Dataset):
    def __init__(self, dataset, num_classes):
        self.num_classes = num_classes
        self.dataset = dataset

    def __len__(self):
        return self.dataset.__len__()

    def __getitem__(self, idx):
        label, text = self.dataset.__getitem__(idx)
        if type(label) == str:
            if label == 'neg':
                label = 0
            else:
                label = 1
        else:
            label = int(label) - 1
        
        return label, text

In [None]:
def collate_batch(batch):
    label_list, text_list = [], []
    for (_label, _tokens) in batch:
        label_list.append(_label)
        embed = embedding.get_vecs_by_tokens(_tokens)
        text_list.append(embed)
    label_list = torch.tensor(label_list, dtype=torch.int64)
    text_list = pad_sequence(text_list, batch_first=True)
    return label_list.to(device), text_list.to(device)

In [None]:
train_set = to_map_style_dataset(train_set)
test_set = to_map_style_dataset(test_set)
train_set = ClassificationDataset(train_set, num_classes, tokenizer, MODEL)
test_set = ClassificationDataset(test_set, num_classes, tokenizer, MODEL)

In [None]:
class BidirectionalLSTMClassifier(torch.nn.Module):
    def __init__(self, num_classes, hidden_size, num_layers):
        super().__init__()
        self.num_layers = num_layers
        self.LSTM = LSTM(50, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.linear = Linear(2 * hidden_size, num_classes)
        self.softmax = Softmax(dim=1)
        
    def forward(self, x):
        _, (h_n, _) = self.LSTM(x)
        h_forward = h_n[2 * self.num_layers - 2]
        h_backward = h_n[2 * self.num_layers - 1]
        y = self.linear(torch.cat((h_forward, h_backward), 1))
        return self.softmax(y)

In [None]:
class CustomPyTorchModelWrapper(textattack.models.wrappers.model_wrapper.ModelWrapper):
    def __init__(self, model, outdim, vocab=torchtext.vocab.GloVe("6B", dim=50), tokenizer=torchtext.data.utils.get_tokenizer("basic_english")):
        self.model = model
        self.tokenizer = tokenizer
        self.outdim = outdim
        self.vocab = vocab
    
    def __call__(self, text_input_list):
        preds = torch.zeros(size=(len(text_input_list),self.outdim))
        for i, review in enumerate(text_input_list):
            tokens = self.tokenizer(review)
            input = self.vocab.get_vecs_by_tokens(tokens)
            with torch.no_grad():
                prediction = self.model(torch.unsqueeze(input,dim=0).to(device))
                preds[i] = prediction

        return preds

In [None]:
from pathlib import Path
from IPython import get_ipython
on_colab = 'google.colab' in str(get_ipython())

if on_colab:
  from google.colab import drive
  drive.mount("/content/gdrive")

PATH =  "/content/gdrive/My Drive/DeepLearning/MODELS/" if on_colab else "./"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# Load Model to be used to generate the adversarial examples for training:
adv_model = BidirectionalLSTMClassifier(num_classes, 64, 1).to(device)
checkpoint = torch.load(PATH+"LSTM_IMDB_CLEAN.pt")
adv_model.load_state_dict(checkpoint['model_state_dict'])
adv_model.eval()

BidirectionalLSTMClassifier(
  (LSTM): LSTM(50, 64, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=128, out_features=2, bias=True)
  (softmax): Softmax(dim=1)
)

In [None]:
# Initialize Model to be trained:
model = BidirectionalLSTMClassifier(num_classes, 64, 1).to(device)

In [None]:
# Initialize Model Wrappers and Attack
adv_model_wrapper = CustomPyTorchModelWrapper(adv_model, outdim=num_classes)
model_wrapper = CustomPyTorchModelWrapper(model, outdim=num_classes)
attack = textattack.attack_recipes.pwws_ren_2019.PWWSRen2019.build(adv_model_wrapper)

train_dataset = textattack.datasets.HuggingFaceDataset("imdb", split="train")
test_dataset = textattack.datasets.HuggingFaceDataset("imdb", split="test")

# Train for 5 epochs with 1 initial clean epochs, 1000 adversarial examples per epoch, learning rate of 5e-5, and effective batch size of 64 (16*8).
training_args = textattack.TrainingArgs(
    num_epochs=5,
    num_clean_epochs=1,
    num_train_adv_examples=1000,
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    log_to_tb=True,
)

trainer = textattack.Trainer(
    model_wrapper,
    "classification",
    attack,
    train_dataset,
    test_dataset,
    training_args
)

textattack: Unknown if model of class <class '__main__.BidirectionalLSTMClassifier'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.


Downloading and preparing dataset imdb/plain_text (download: 80.23 MiB, generated: 127.06 MiB, post-processed: Unknown size, total: 207.28 MiB) to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=84125825.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…

textattack: Loading [94mdatasets[0m dataset [94mimdb[0m, split [94mtrain[0m.


Dataset imdb downloaded and prepared to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3. Subsequent calls will reuse this data.


Reusing dataset imdb (/root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3)
textattack: Loading [94mdatasets[0m dataset [94mimdb[0m, split [94mtest[0m.
textattack: `model_wrapper` and the victim model of `attack` are not the same model.


In [None]:
trainer.train()

textattack: Writing logs to ./outputs/2021-12-14-15-10-51-520794/train_log.txt.
textattack: Wrote original training args to ./outputs/2021-12-14-15-10-51-520794/training_args.json.
textattack: ***** Running training *****
textattack:   Num examples = 25000
textattack:   Num epochs = 5
textattack:   Num clean epochs = 1
textattack:   Instantaneous batch size per device = 16
textattack:   Total train batch size (w. parallel, distributed & accumulation) = 64
textattack:   Gradient accumulation steps = 4
textattack:   Total optimization steps = 2019
textattack: Epoch 1
textattack: Running clean epoch 1/1
Iteration:   0%|          | 0/1563 [00:00<?, ?it/s]


AttributeError: ignored

In [None]:
train_set_orig = IMDB(split='train')
train_set_orig = to_map_style_dataset(train_set_orig)
train_set_orig = AugmentDataset(train_set_orig, num_classes)

In [None]:
train_set_orig.__getitem__(idx)

(0,
 '"I Am Curious: Yellow" is a risible and pretentious steaming pile. It doesn\'t matter what one\'s political views are because this film can hardly be taken seriously on any level. As for the claim that frontal male nudity is an automatic NC-17, that isn\'t true. I\'ve seen R-rated films with male nudity. Granted, they only offer some fleeting views, but where are the R-rated films with gaping vulvas and flapping labia? Nowhere, because they don\'t exist. The same goes for those crappy cable shows: schlongs swinging in the breeze but not a clitoris in sight. And those pretentious indie movies like The Brown Bunny, in which we\'re treated to the site of Vincent Gallo\'s throbbing johnson, but not a trace of pink visible on Chloe Sevigny. Before crying (or implying) "double-standard" in matters of nudity, the mentally obtuse should take into account one unavoidably obvious anatomical difference between men and women: there are no genitals on display when actresses appears nude, and 

In [None]:
idx = 1
print(train_set_orig.__getitem__(idx)[0])#(0)["text"])
print(train_set_orig.__getitem__(idx)[1])

0
"I Am Curious: Yellow" is a risible and pretentious steaming pile. It doesn't matter what one's political views are because this film can hardly be taken seriously on any level. As for the claim that frontal male nudity is an automatic NC-17, that isn't true. I've seen R-rated films with male nudity. Granted, they only offer some fleeting views, but where are the R-rated films with gaping vulvas and flapping labia? Nowhere, because they don't exist. The same goes for those crappy cable shows: schlongs swinging in the breeze but not a clitoris in sight. And those pretentious indie movies like The Brown Bunny, in which we're treated to the site of Vincent Gallo's throbbing johnson, but not a trace of pink visible on Chloe Sevigny. Before crying (or implying) "double-standard" in matters of nudity, the mentally obtuse should take into account one unavoidably obvious anatomical difference between men and women: there are no genitals on display when actresses appears nude, and the same ca

In [None]:
lbl = train_set_orig.__getitem__(idx)[0]
txt = train_set_orig.__getitem__(idx)[1]

In [None]:
txt

'"I Am Curious: Yellow" is a risible and pretentious steaming pile. It doesn\'t matter what one\'s political views are because this film can hardly be taken seriously on any level. As for the claim that frontal male nudity is an automatic NC-17, that isn\'t true. I\'ve seen R-rated films with male nudity. Granted, they only offer some fleeting views, but where are the R-rated films with gaping vulvas and flapping labia? Nowhere, because they don\'t exist. The same goes for those crappy cable shows: schlongs swinging in the breeze but not a clitoris in sight. And those pretentious indie movies like The Brown Bunny, in which we\'re treated to the site of Vincent Gallo\'s throbbing johnson, but not a trace of pink visible on Chloe Sevigny. Before crying (or implying) "double-standard" in matters of nudity, the mentally obtuse should take into account one unavoidably obvious anatomical difference between men and women: there are no genitals on display when actresses appears nude, and the s

In [None]:
res = attack.attack(txt, lbl)

In [None]:
new_txt = res.perturbed_text()
new_txt

'"ace embody singular: sensationalistic" is a comic and ostentatious steamy throng. It doesn\'t topic what one\'s political views are because this film can hardly be taken seriously on any level. As for the claim that frontal male nudity is an automatic NC-17, that isn\'t true. I\'ve experience R-rated films with male nudity. Granted, they only offer some fleeting views, but where are the R-rated films with gaping vulvas and flapping labia? Nowhere, because they don\'t exist. The same goes for those crappy cable shows: schlongs swinging in the breeze but not a clitoris in sight. And those ostentatious indie movies like The Brown Bunny, in which we\'re treated to the site of Vincent Gallo\'s throbbing johnson, but not a trace of pink visible on Chloe Sevigny. Before crying (or implying) "double-standard" in matters of nudity, the mentally obtuse should take into account one unavoidably obvious anatomical difference between men and women: there are no genitals on display when actresses a

In [None]:
res.goal_function_result_str()

'0 (99%) --> 1 (83%)'

In [None]:
type(res) == textattack.attack_results.failed_attack_result.FailedAttackResult

False

In [None]:
type(res)

textattack.attack_results.successful_attack_result.SuccessfulAttackResult

In [None]:
len(train_set_orig)

25000

In [None]:
# List of lists to return
to_return = []
pbar = tqdm(total=len(train_set_orig))

for i in range(len(train_set_orig)):

    # Get the sample from the original training set
    lbl = train_set_orig.__getitem__(i)[0]
    txt = train_set_orig.__getitem__(i)[1]

    # Attack that sample using PWWS
    res = attack.attack(txt, lbl)

    # Append the original sample to_return set
    to_return.append((lbl, txt))

    # Append the adversarial sample if the attack is successful with the original label
    if type(res) == textattack.attack_results.successful_attack_result.SuccessfulAttackResult:
        to_return.append((lbl, res.perturbed_text()))
    
    pbar.update()
    

# To return contains either the original text if the adversarial attack fails, or both the adversarial example and the original
# if the attack is successful

print(len(to_return))

Sample [8/25000]:   0%|          | 0/25000 [00:00<?, ?it/s]

SAMPLE NUMBER = 0


Sample [8/25000]:   0%|          | 42/25000 [11:29<137:00:31, 19.76s/it]