### Importing

In [None]:
!pip install textattack

Collecting textattack
  Downloading textattack-0.3.10-py3-none-any.whl (445 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m445.7/445.7 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bert-score>=0.3.5 (from textattack)
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting flair (from textattack)
  Downloading flair-0.13.1-py3-none-any.whl (388 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m388.3/388.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Collecting language-tool-python (from textattack)
  Downloading language_tool_python-2.8-py3-none-any.whl (35 kB)
Collecting lemminflect (from textattack)
  Downloading lemminflect-0.2.3-py3-none-any.whl (769 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m769.7/769.7 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lru-di

In [None]:
import textattack
import transformers
from textattack.constraints.pre_transformation import RepeatModification, StopwordModification
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.transformations import WordSwapEmbedding
from textattack.search_methods import GreedyWordSwapWIR

textattack: Updating TextAttack package dependencies.
textattack: Downloading NLTK required packages.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package omw to /root/nltk_data...
[nltk_data] Downloading package universal_tagset to /root/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:

# Load model, tokenizer, and model_wrapper
model = transformers.AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")
tokenizer = transformers.AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)

# Construct our four components for `Attack`
goal_function = textattack.goal_functions.UntargetedClassification(model_wrapper)
constraints = [
    RepeatModification(),
    StopwordModification(),
    WordEmbeddingDistance(min_cos_sim=0.9)
]
transformation = WordSwapEmbedding(max_candidates=50)
search_method = GreedyWordSwapWIR(wir_method="delete")

# Construct the actual attack
attack = textattack.Attack(goal_function, constraints, transformation, search_method)

input_text = "I really enjoyed the new movie that came out last month."
label = 1 #Positive
attack_result = attack.attack(input_text, label)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/511 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

textattack: Unknown if model of class <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Downloading https://textattack.s3.amazonaws.com/word_embeddings/paragramcf.
100%|██████████| 481M/481M [00:10<00:00, 45.1MB/s]
textattack: Unzipping file /root/.cache/textattack/tmp5nz89w8t.zip to /root/.cache/textattack/word_embeddings/paragramcf.
textattack: Successfully saved word_embeddings/paragramcf to cache.


In [None]:
attack_result.original_text()

'I really enjoyed the new movie that came out last month.'

In [None]:
attack_result.perturbed_text()

'I really enjoyed the novel movies that came out last month.'

In [None]:
attack_result.goal_function_result_str()

'1 (99%) --> [FAILED]'

### Try BAE

In [None]:
from textattack.attack_recipes import BAEGarg2019

attack = BAEGarg2019.build(model_wrapper)

input_text = "I really enjoyed the new movie that came out last month."
label = 1 #Positive
attack_result = attack.attack(input_text, label)

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

textattack: Unknown if model of class <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.


In [None]:
attack_result.original_text()

'I really enjoyed the new movie that came out last month.'

In [None]:
attack_result.perturbed_text()

'I really hated the new movie that came out last month.'

In [None]:
attack_result.goal_function_result_str()

'1 (99%) --> 0 (100%)'

### Simulated Annealing

In [None]:
from textattack.search_methods import SearchMethod
from textattack.goal_function_results import GoalFunctionResultStatus
import random
import numpy as np
import textattack
import transformers
from textattack.constraints.pre_transformation import StopwordModification
from textattack.goal_functions import UntargetedClassification
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.transformations import WordSwapEmbedding

In [None]:
class SimulatedAnnealing(SearchMethod):

    def __init__(self, temperature=1.0, max_iter=100):
        self.temperature = temperature
        self.max_iter = max_iter

    def perform_search(self, initial_result):
        best_result = initial_result
        current_text = initial_result.attacked_text
        episode = 0
        temp = self.temperature
        while not best_result.goal_status == GoalFunctionResultStatus.SUCCEEDED:
            temp = self._schedule(temp, episode)
            if temp == 0.0 or episode == self.max_iter:
                return best_result

            transformations = self.get_transformations(
                current_text, original_text=initial_result.attacked_text)

            if len(transformations) == 0:
                return best_result

            next_text = random.choice(transformations)
            result, search_over = self.get_goal_results([next_text])
            result = result[0]
            next_score = result.score
            curr_score = best_result.score
            delta = next_score - curr_score
            if (delta > 0) or (random.random() < np.exp(delta / temp)):
                current_text = next_text
                best_result = result
            episode += 1
            if search_over:
                return best_result
        return best_result

    def _schedule(self, temp, i):
        return temp / float(i + 1)
        # return temp*0.9

    @property
    def is_black_box(self):
        return True

In [None]:
# Load model, tokenizer, and model_wrapper
model = transformers.AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")
tokenizer = transformers.AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)

# Construct our four components for `Attack`
goal_function = UntargetedClassification(model_wrapper)
constraints = [
    StopwordModification(),
    WordEmbeddingDistance(min_cos_sim=0.7)
]
transformation = WordSwapEmbedding(max_candidates=50)


search_method = SimulatedAnnealing(10.0,100)
# search_method = BeamSearch()
# Construct the actual attack
attack = textattack.Attack(goal_function, constraints, transformation, search_method)

input_text = "I really enjoyed the new movie that came out last month."
label = 1 #Positive
attack_result = attack.attack(input_text, label)

textattack: Unknown if model of class <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.


In [None]:
print(attack_result.perturbed_text())
print(attack_result.goal_function_result_str())

I really rained the novel cinematographic that became out last mes.
1 (99%) --> 0 (100%)
