<a href="https://colab.research.google.com/github/riccardocappi/Text-Adversarial-Attack/blob/adversarial-training/adversarial_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing

In [None]:
!pip install textattack

## Imports

In [4]:
import textattack
import transformers
from textattack.datasets import HuggingFaceDataset
from textattack import Attacker
from textattack.attack_recipes import BAEGarg2019
from textattack.metrics.attack_metrics import (
    AttackQueries,
    AttackSuccessRate,
    WordsPerturbed,
)
import json

## Helper Functions

In [5]:
def load_model_and_tokenizer(hugging_face_path):
    model = transformers.AutoModelForSequenceClassification.from_pretrained(hugging_face_path)
    tokenizer = transformers.AutoTokenizer.from_pretrained(hugging_face_path)
    model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)
    return model_wrapper


def save_to_file(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f)


def save_perturbed_metrics(run_results, drive_path=None):
    perturbed_examples = []
    attack_success_stats = AttackSuccessRate().calculate(run_results)
    words_perturbed_stats = WordsPerturbed().calculate(run_results)
    words_perturbed_stats.pop('num_words_changed_until_success', None)
    attack_query_stats = AttackQueries().calculate(run_results)
    perturbed_dict = {"attack_success_stats": attack_success_stats, "words_perturbed_stats": words_perturbed_stats,
                      "attack_query_stats": attack_query_stats}
    for result in run_results:
        if isinstance(result, textattack.attack_results.SuccessfulAttackResult):
            perturbed_result = {"original_text": result.original_text(), "perturbed_text": result.perturbed_text(),
                                "gfr": result.goal_function_result_str()}
            perturbed_examples.append(perturbed_result)
    perturbed_dict["perturbed_examples"] = perturbed_examples

    try:
        save_to_file(perturbed_examples, f'perturbed_text/perturbed_{i}.json')
        if drive_path is not None:
            save_to_file(perturbed_examples, drive_path + f'perturbed_{i}.json')
    except:
        print(f"Error while saving perturbed examples on drive at iteration {i}")

## Loading dataset and model
Getting train data in order to find attack transformation to add into the training set

In [6]:
bert_imdb = load_model_and_tokenizer("textattack/bert-base-uncased-imdb")
imdb_dataset_train = HuggingFaceDataset("imdb", split="train", shuffle=True)

textattack: Loading [94mdatasets[0m dataset [94mimdb[0m, split [94mtrain[0m.


# Attack

In [None]:
batch_size = 10
n_iter = 200

for i in range(n_iter):
    attack = BAEGarg2019.build(bert_imdb)
    attack_args = textattack.AttackArgs(num_examples=batch_size, num_examples_offset=i * batch_size, parallel=True)
    attacker = Attacker(attack, imdb_dataset_train, attack_args)
    adv_exp_bae_bert_imdb = attacker.attack_dataset()
    save_perturbed_metrics(adv_exp_bae_bert_imdb)

### Try with yelp dataset

In [None]:
model = transformers.AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-yelp-polarity")
tokenizer = transformers.AutoTokenizer.from_pretrained("textattack/bert-base-uncased-yelp-polarity")
model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)

In [None]:
# Load dataset
dataset = HuggingFaceDataset("yelp_polarity", None, "test", shuffle=True)

In [None]:
attack = GeneticAlgorithmAlzantot2018.build(model_wrapper)
#sesso

attack_args = textattack.AttackArgs(num_examples=10, parallel=True)
attacker = Attacker(attack, dataset, attack_args)
attacker.attack_dataset()

# ADVERSARIAL TRAINING & DATA AUGMENTATION #


In [None]:
BERTModel = transformers.AutoModelForSequenceClassification.from_pretrained(
    "textattack/bert-base-uncased-yelp-polarity")

In [None]:
BERTModel.num_parameters()

In [None]:
BERTModel

# FineTune function #

In [None]:
def fineTuneModel(model, train_dataset, val_dataset, epochs=20, batch_s=16):
    for param in model.parameters():
        param.require_grad = False

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_s, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_s)

    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    loss_fn = torch.nn.CrossEntropyLoss()

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    #Training
    for epoch in range(epochs):
        model.train()

        for batch in train_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Forward pass
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits

            # Loss calculation and backpropagation
            loss = loss_fn(logits, labels)
            loss.backward()

            optimizer.step()
            optimizer.zero_grad()
