In [1]:
import textattack

In [51]:
import numpy as np
import torch

from textattack.goal_function_results import ClassificationGoalFunctionResult
from textattack.goal_functions import GoalFunction

class ReduceSimilarity(GoalFunction):
    """A goal function for reducing similarity between input and perturbed text."""
    
    def init_attack_example(self, attacked_text, ground_truth_output):
        # print('init_attack_example:', attacked_text)
        # Store original vector
        with torch.no_grad():
            self.ground_truth_representation = self.model(ground_truth_output).cpu().squeeze() # [1,emb_dim] -> [emb_dim,]
        return super().init_attack_example(attacked_text, ground_truth_output)

    def _process_model_outputs(self, inputs, scores):
        # Automatically cast a list or ndarray of predictions to a tensor.
        if isinstance(scores, list) or isinstance(scores, np.ndarray):
            scores = torch.tensor(scores)

        # Ensure the returned value is now a tensor.
        if not isinstance(scores, torch.Tensor):
            raise TypeError(
                "Must have list, np.ndarray, or torch.Tensor of "
                f"scores. Got type {type(scores)}"
            )

        # Validation check on model score dimensions
        if scores.ndim == 1:
            # Unsqueeze prediction, if it's been squeezed by the model.
            if len(inputs) == 1:
                scores = scores.unsqueeze(dim=0)
            else:
                raise ValueError(
                    f"Model return score of shape {scores.shape} for {len(inputs)} inputs."
                )
        elif scores.ndim != 2:
            # If model somehow returns too may dimensions, throw an error.
            raise ValueError(
                f"Model return score of shape {scores.shape} for {len(inputs)} inputs."
            )
        elif scores.shape[0] != len(inputs):
            # If model returns an incorrect number of scores, throw an error.
            raise ValueError(
                f"Model return score of shape {scores.shape} for {len(inputs)} inputs."
            )
        return scores.cpu()
    
    def _is_goal_complete(self, model_output, _):
        return False # TODO(jxm): implement?

    def _get_score(self, model_output, _):
        sim = (
            torch.nn.CosineSimilarity(dim=0)(self.ground_truth_representation, model_output)
            .item()
        )
        return 1-sim

    def _goal_function_result_type(self):
        """Returns the class of this goal function's results."""
        return ClassificationGoalFunctionResult

    def extra_repr_keys(self):
        return []

    def _get_displayed_output(self, raw_output):
        sim = (
            torch.nn.CosineSimilarity(dim=0)(self.ground_truth_representation, raw_output)
        ).item()
        return f'{sim:.4f}'

In [55]:
from textattack import Attack
from textattack.constraints.pre_transformation import (
    RepeatModification,
    StopwordModification,
)

from textattack.constraints.grammaticality import PartOfSpeech
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder

from textattack.goal_functions import InputReduction
from textattack.search_methods import GreedyWordSwapWIR
from textattack.transformations import WordSwapEmbedding

from textattack.attack_recipes import AttackRecipe


class SimilarityReduction(AttackRecipe):
    @staticmethod
    def build(model_wrapper):
        # At each step, we remove the word with the lowest importance value until
        # the model changes its prediction.
        transformation = WordSwapEmbedding(max_candidates=50)
        constraints = [RepeatModification()]
        constraints.append(StopwordModification())
        constraints.append(WordEmbeddingDistance(min_cos_sim=0.5))
        constraints.append(PartOfSpeech(allow_verb_noun_swap=True))
        use_constraint = UniversalSentenceEncoder(
            threshold=0.840845057,
            metric="angular",
            compare_against_original=False,
            window_size=15,
            skip_text_shorter_than_window=True,
        )
        constraints.append(use_constraint)
        # TODO: Add constraints
        goal_function = ReduceSimilarity(model_wrapper, maximizable=True)
        
        search_method = GreedyWordSwapWIR()

        return Attack(goal_function, constraints, transformation, search_method)

In [56]:
import transformers
from textattack.models.wrappers import PyTorchModelWrapper

model = transformers.AutoModel.from_pretrained("bert-base-uncased")
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")

class HuggingFaceModelWrapper(PyTorchModelWrapper):
    """Loads a HuggingFace ``transformers`` model and tokenizer."""
    model: transformers.PreTrainedModel
    tokenizer: transformers.PreTrainedTokenizer
    max_length: int

    def __init__(self, model, tokenizer, max_length=128):
        assert isinstance(
            model, transformers.PreTrainedModel
        ), f"`model` must be of type `transformers.PreTrainedModel`, but got type {type(model)}."
        assert isinstance(
            tokenizer,
            (transformers.PreTrainedTokenizer, transformers.PreTrainedTokenizerFast),
        ), f"`tokenizer` must of type `transformers.PreTrainedTokenizer` or `transformers.PreTrainedTokenizerFast`, but got type {type(tokenizer)}."

        self.model = model
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, text_input_list):
        """Passes inputs to HuggingFace models as keyword arguments.
        (Regular PyTorch ``nn.Module`` models typically take inputs as
        positional arguments.)
        """
        # Default max length is set to be int(1e30), so we force 512 to enable batching.
        inputs_dict = self.tokenizer(
            text_input_list,
            add_special_tokens=True,
            padding="max_length",
            max_length=self.max_length,
            truncation=True,
            return_tensors="pt",
        )
        model_device = next(self.model.parameters()).device
        inputs_dict.to(model_device)

        with torch.no_grad():
            outputs = self.model(**inputs_dict)
        # Outputs will be of shape [batch_size, self.max_length, 768]
        outputs = outputs.last_hidden_state[:, 0, :] # get CLS representation
        return outputs

    
    def _tokenize(self, inputs):
        """Helper method that for `tokenize`
        Args:
            inputs (list[str]): list of input strings
        Returns:
            tokens (list[list[str]]): List of list of tokens as strings
        """
        return [
            self.tokenizer.convert_ids_to_tokens(
                self.tokenizer([x], truncation=True)["input_ids"][0]
            )
            for x in inputs
        ]

model_wrapper = HuggingFaceModelWrapper(model, tokenizer)
attack = SimilarityReduction.build(model_wrapper)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
textattack: No entry found for goal function <class '__main__.ReduceSimilarity'>.
textattack: Unknown if model of class <class 'transformers.models.bert.mo

In [57]:
from tqdm import tqdm # tqdm provides us a nice progress bar.
from textattack.loggers import CSVLogger # tracks a dataframe for us.
from textattack.attack_results import SuccessfulAttackResult
from textattack import Attacker
from textattack import AttackArgs
from textattack.datasets import Dataset

custom_dataset = [
    'Malaria deaths in Africa fall by 5% from last year',
    'Washington Nationals defeat the Houston Astros to win the World Series',
    'Exxon Mobil hires a new CEO',
    'Microsoft invests $1 billion in OpenAI',
]

custom_dataset = [(t, t) for t in custom_dataset]

attack_args = AttackArgs(num_examples=4)

dataset = Dataset(custom_dataset)

attacker = Attacker(attack, dataset, attack_args)

results_iterable = attacker.attack_dataset()

logger = CSVLogger(color_method='html')

for result in results_iterable:
    print('result:', result)
    logger.log_attack_result(result)

from IPython.core.display import display, HTML

display(HTML(logger.df.to_html(escape=False)))

Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  unk
  )
  (goal_function):  ReduceSimilarity
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.5
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (2): UniversalSentenceEncoder(
        (metric):  angular
        (threshold):  0.840845057
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  False
      )
    (3): RepeatModification
    (4): StopwordModification
  (is_black_box):  True
) 




  0%|          | 0/4 [00:00<?, ?it/s][A
 25%|██▌       | 1/4 [00:05<00:15,  5.23s/it][A
[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:  25%|██▌       | 1/4 [00:05<00:15,  5.23s/it][A

--------------------------------------------- Result 1 ---------------------------------------------

[[Malaria]] [[deaths]] in [[Africa]] [[fall]] by 5% from [[last]] [[year]]

[[Mosquitos]] [[assassinate]] in [[Chau]] [[downturn]] by 5% from [[latter]] [[yr]]





[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:  50%|█████     | 2/4 [00:06<00:06,  3.01s/it][A
[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2:  50%|█████     | 2/4 [00:06<00:06,  3.01s/it][A

--------------------------------------------- Result 2 ---------------------------------------------

[[Washington]] [[Nationals]] [[defeat]] the [[Houston]] [[Astros]] to [[win]] the [[World]] [[Series]]

[[Tacoma]] [[Citizenship]] [[bt]] the [[Tulsa]] [[Celtics]] to [[finalist]] the [[Universally]] [[Instalments]]





[Succeeded / Failed / Skipped / Total] 2 / 0 / 0 / 2:  75%|███████▌  | 3/4 [00:06<00:02,  2.11s/it][A
[Succeeded / Failed / Skipped / Total] 3 / 0 / 0 / 3:  75%|███████▌  | 3/4 [00:06<00:02,  2.11s/it][A
[Succeeded / Failed / Skipped / Total] 3 / 0 / 0 / 3: 100%|██████████| 4/4 [00:06<00:00,  1.63s/it][A
[Succeeded / Failed / Skipped / Total] 4 / 0 / 0 / 4: 100%|██████████| 4/4 [00:06<00:00,  1.63s/it][A

--------------------------------------------- Result 3 ---------------------------------------------

Exxon Mobil [[hires]] a [[new]] [[CEO]]

Exxon Mobil [[renting]] a [[recent]] [[IB]]


--------------------------------------------- Result 4 ---------------------------------------------

Microsoft [[invests]] $1 [[billion]] in OpenAI

Microsoft [[returning]] $1 [[million]] in OpenAI



+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 4      |
| Number of failed attacks:     | 0      |
| Number of skipped attacks:    | 0      |
| Original accuracy:            | 100.0% |
| Accuracy under attack:        | 0.0%   |
| Attack success rate:          | 100.0% |
| Average perturbed word %:     | 54.02% |
| Average num. words per input: | 8.25   |
| Avg num queries:              | 128.0  |
+-------------------------------+--------+


textattack: Logging to CSV at path results.csv
textattack: CSVLogger exiting without calling flush().



result: 205 (327%) --> 205 (222%)

Malaria deaths in Africa fall by 5% from last year

Mosquitos assassinate in Chau downturn by 5% from latter yr
result: 205 (222%) --> 205 (359%)

Washington Nationals defeat the Houston Astros to win the World Series

Tacoma Citizenship bt the Tulsa Celtics to finalist the Universally Instalments
result: 205 (366%) --> 205 (369%)

Exxon Mobil hires a new CEO

Exxon Mobil renting a recent IB
result: 205 (378%) --> 205 (313%)

Microsoft invests $1 billion in OpenAI

Microsoft returning $1 million in OpenAI


  self.df = self.df.append(row, ignore_index=True)
  self.df = self.df.append(row, ignore_index=True)
  self.df = self.df.append(row, ignore_index=True)
  self.df = self.df.append(row, ignore_index=True)
  from IPython.core.display import display, HTML


Unnamed: 0,original_text,perturbed_text,original_score,perturbed_score,original_output,perturbed_output,ground_truth_output,num_queries,result_type
0,Malaria deaths in Africa fall by 5% from last year,Mosquitos assassinate in Chau downturn by 5% from latter yr,0.0,0.483783,1.0,0.5162,Malaria deaths in Africa fall by 5% from last year,150,Maximized
1,Washington Nationals defeat the Houston Astros to win the World Series,Tacoma Citizenship bt the Tulsa Celtics to finalist the Universally Instalments,0.0,0.430628,1.0,0.5694,Washington Nationals defeat the Houston Astros to win the World Series,236,Maximized
2,Exxon Mobil hires a new CEO,Exxon Mobil renting a recent IB,0.0,0.197282,1.0,0.8027,Exxon Mobil hires a new CEO,88,Maximized
3,Microsoft invests $1 billion in OpenAI,Microsoft returning $1 million in OpenAI,0.0,0.132748,1.0,0.8673,Microsoft invests $1 billion in OpenAI,38,Maximized
