<a href="https://colab.research.google.com/github/riccardocappi/Text-Adversarial-Attack/blob/adversarial-training/TextAttack_and_Augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing

In [None]:
!pip install textattack

Collecting textattack
  Downloading textattack-0.3.10-py3-none-any.whl (445 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m445.7/445.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bert-score>=0.3.5 (from textattack)
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting flair (from textattack)
  Downloading flair-0.13.1-py3-none-any.whl (388 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.3/388.3 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
Collecting language-tool-python (from textattack)
  Downloading language_tool_python-2.8-py3-none-any.whl (35 kB)
Collecting lemminflect (from textattack)
  Downloading lemminflect-0.2.3-py3-none-any.whl (769 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m769.7/769.7 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lru-

## Imports

In [None]:
import textattack
import os
import transformers
from textattack.datasets import HuggingFaceDataset
from textattack import Attacker
from textattack.attack_recipes import BAEGarg2019
from textattack.metrics.attack_metrics import (
    AttackQueries,
    AttackSuccessRate,
    WordsPerturbed,
)
import json
import os

2024-05-08 12:56:48.092989: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Helper Functions

In [None]:
def load_model_and_tokenizer(hugging_face_path):
    model = transformers.AutoModelForSequenceClassification.from_pretrained(hugging_face_path)
    tokenizer = transformers.AutoTokenizer.from_pretrained(hugging_face_path)
    model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)
    return model_wrapper


def save_to_file(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f)


def save_perturbed_metrics(run_results, drive_path=None):
    os.makedirs('./perturbed_text', exist_ok=True)
    perturbed_examples = []
    attack_success_stats = AttackSuccessRate().calculate(run_results)
    words_perturbed_stats = WordsPerturbed().calculate(run_results)
    words_perturbed_stats.pop('num_words_changed_until_success', None)
    attack_query_stats = AttackQueries().calculate(run_results)
    perturbed_dict = {"attack_success_stats": attack_success_stats, "words_perturbed_stats": words_perturbed_stats,
                      "attack_query_stats": attack_query_stats}
    for result in run_results:
        if isinstance(result, textattack.attack_results.SuccessfulAttackResult):
            perturbed_result = {"original_text": result.original_text(), "perturbed_text": result.perturbed_text(),
                                "gfr": result.goal_function_result_str()}
            perturbed_examples.append(perturbed_result)
    perturbed_dict["perturbed_examples"] = perturbed_examples

    try:
        save_to_file(perturbed_dict, f'perturbed_text/perturbed_{i}.json')
        if drive_path is not None:
            path = os.path.join(drive_path, f'perturbed_{i}.json')
            save_to_file(perturbed_dict, path)
    except:
        print(f"Error while saving perturbed examples on drive at iteration {i}")

## Loading dataset and model
Getting train data in order to find attack transformation to add into the training set

In [None]:
bert_imdb = load_model_and_tokenizer("textattack/bert-base-uncased-imdb")
imdb_dataset_train = HuggingFaceDataset("imdb", split="train", shuffle=True)

textattack: Loading [94mdatasets[0m dataset [94mimdb[0m, split [94mtrain[0m.


# Attack

In [None]:
batch_size = 10
n_iter = 300

for i in range(15, n_iter):
    attack = BAEGarg2019.build(bert_imdb)
    attack_args = textattack.AttackArgs(num_examples=batch_size, num_examples_offset=i * batch_size, parallel=True)
    attacker = Attacker(attack, imdb_dataset_train, attack_args)
    adv_exp_bae_bert_imdb = attacker.attack_dataset()
    save_perturbed_metrics(adv_exp_bae_bert_imdb, '/run/user/1000/gvfs/google-drive:host=studenti.unipd.it,user=damiano.bertoldo/GVfsSharedWithMe/1OKUVPYQzqhGpqt_wXW2NRFoYukW7q2V7/18czngAsdABbSvUzTbIo8SIRi_G-f8Z7G/')

# AUGMENTATION

In [None]:
from google.colab import drive
import json
drive.mount('/content/drive')

#import perturbed_0.json
with open("/content/drive/MyDrive/perturbed_text/perturbed_0.json", "r") as json_file:
    data = json.load(json_file)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# import transformations, contraints, and the Augmenter
import textattack

from textattack.transformations import CompositeTransformation
from textattack.transformations import WordSwapMaskedLM

from textattack.constraints.pre_transformation import RepeatModification
from textattack.constraints.pre_transformation import StopwordModification

from textattack.augmentation import Augmenter

In [None]:
# Set up transformation using CompositeTransformation()
transformation = CompositeTransformation(
    [WordSwapMaskedLM(method='bae', masked_language_model='bert-base-uncased', tokenizer=None, max_length=512, max_candidates=3, min_confidence=0.0005, batch_size=16)]
)
# Set up constraints
constraints = [RepeatModification(), StopwordModification()]
# Create augmenter with specified parameters
augmenter = Augmenter(
    transformation=transformation,
    constraints=constraints,
    pct_words_to_swap=0.2,
    transformations_per_example=2,
)


perturbed_examples = data["perturbed_examples"]

for example in perturbed_examples:
    print(example["original_text"])
    results= augmenter.augment(example["perturbed_text"])
    print(f"Average Original Perplexity Score: {results[1]['avg_original_perplexity']}\n")
    print(f"Average Augment Perplexity Score: {results[1]['avg_attack_perplexity']}\n")
    print(f"Average Augment USE Score: {results[2]['avg_attack_use_score']}\n")
    print(f"Augmentations:")
    results[0]

# FineTune function #

In [None]:
def fineTuneModel(model, train_dataset, val_dataset, epochs=20, batch_s=16):
    for param in model.parameters():
        param.require_grad = False

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_s, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_s)

    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    loss_fn = torch.nn.CrossEntropyLoss()

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    #Training
    for epoch in range(epochs):
        model.train()

        for batch in train_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Loss calculation and backpropagation
            loss = loss_fn(logits, labels)
            loss.backward()

            optimizer.step()
            optimizer.zero_grad()
