In [None]:
!pip install textattack pyarrow==1.0 sentence-transformers > /dev/null

[31mERROR: datascience 0.10.6 has requirement folium==0.2.1, but you'll have folium 0.8.3 which is incompatible.[0m
[31mERROR: textattack 0.2.4 has requirement pyarrow<1.0, but you'll have pyarrow 1.0.0 which is incompatible.[0m


In [None]:
from nltk.corpus import wordnet

import textattack
from textattack.transformations.word_swap import WordSwap


class WordSwapWordNetAntonym(WordSwap):
    """Transforms an input by replacing its words with synonyms provided by
    WordNet."""

    def _get_replacement_words(self, word, random=False):
        """Returns a list containing all possible words with 1 character
        replaced by a homoglyph."""
        antonyms = set()
        for syn in wordnet.synsets(word):
            for lemma in syn.lemmas():
                if lemma.antonyms():
                  for ant_lemma in lemma.antonyms():
                    ant_word = ant_lemma.name()
                    if (
                        (ant_word != word)
                        and ("_" not in ant_word)
                        and (textattack.shared.utils.is_one_word(ant_word))
                    ):
                        # WordNet can suggest phrases that are joined by '_' but we ignore phrases.
                        antonyms.add(ant_word)
        return list(antonyms)

[34;1mtextattack[0m: First time running textattack: downloading remaining required packages.
[34;1mtextattack[0m: Downloading NLTK required packages.


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package universal_tagset to /root/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.




In [None]:
def set_seed(random_seed):
  import random
  random.seed(random_seed)
  import numpy as np
  np.random.seed(random_seed)
  import torch
  torch.manual_seed(random_seed)
  torch.cuda.manual_seed(random_seed)

set_seed(42)

In [None]:
import torch
import textattack

class FoolConstraintGoalFunction(textattack.goal_functions.classification.UntargetedClassification):
  def __init__(self, constraint, min_acceptable_score=0.8,
                num_words_to_swap=2):
    self.constraint = constraint
    self.query_budget = float("inf")
    self.min_acceptable_score = min_acceptable_score
    self.use_cache = False
    self.num_words_to_swap = num_words_to_swap
    self.maximizable = False
  
  def _should_skip(self, *_):
    return False
  
  def _is_goal_complete(self, model_output, attacked_text):
    num_words_swapped = len(attacked_text.attack_attrs.get('modified_indices', []))
    model_score =  model_output.item()

    return (num_words_swapped >= self.num_words_to_swap) and (model_score >= self.min_acceptable_score)

  def _call_model(self, attacked_text_list):
    """ Gets predictions for a list of `AttackedText` objects.

    Gets prediction from cache if possible. If prediction is not in the 
    cache, queries model and stores prediction in cache.
    """
    original_text = attacked_text_list[0]
    while "previous_attacked_text" in original_text.attack_attrs:
      original_text = original_text.attack_attrs["previous_attacked_text"]

    scores = []
    for at in attacked_text_list:
      if "newly_modified_indices" not in at.attack_attrs:
        # Original text
        scores.append([1.0])
      else:
        at.attack_attrs["last_transformation"] = transformation
        if isinstance(self.constraint, BERTScore):
          # call bert scorer specially
          model_scores = self.constraint._bert_scorer.score([original_text.text], [at.text])
        else:
          # otherwise, it's a sentence encoder
          model_scores = self.constraint._score_list(original_text, [at])
        scores.append([model_scores[0]])
    return torch.tensor(scores)
      
  def _get_score(self, model_output, attacked_text):
    model_score =  model_output.item()
    if model_score < self.min_acceptable_score:
      return 0.0

    num_words_swapped = len(attacked_text.attack_attrs.get('modified_indices', []))
    num_words = len(attacked_text.words)
    num_words_score = (num_words_swapped / num_words)
    return num_words_swapped + model_score

In [None]:
from textattack.constraints.grammaticality.language_models import GPT2
from textattack.constraints.semantics import BERTScore
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.constraints.pre_transformation import InputColumnModification, RepeatModification, StopwordModification
from textattack.datasets import HuggingFaceNlpDataset
from textattack.search_methods import BeamSearch
from textattack.shared import Attack
from textattack.transformations import WordSwapEmbedding

import numpy as np
import tqdm

transformation = WordSwapWordNetAntonym()

# We'll constrain modification of already modified indices and stopwords
constraints = [RepeatModification(), StopwordModification()]
# don't attack premise in entailment
constraints.append(InputColumnModification(["premise", "hypothesis"], {"premise"}))
# use GPT2 to try and make sentences somewhat plausible
constraints.append(GPT2(max_log_prob_diff=2.0))

# Use SNLI dataset
dataset = HuggingFaceNlpDataset("snli", None, "test", [1, 2, 0], shuffle=True)

data = []

num_samples = 100
num_words_to_swap = 2

all_constraints = ('bertscore', 'use')
threshold_vals = np.arange(.75, 1.0, .02)
for constraint_idx, constraint_name in enumerate(all_constraints):
  tqdm.tqdm.write(f'----> constraint {constraint_name}')

  # We know this second-order attack fails most of the time, and fails more as the 
  # threshold increases. Any example that fails will continue to fail. We take 
  # advantage of this fact through caching.
  known_failure_idxs = set()

  if constraint_name == 'bertscore':
    constraint = BERTScore(
      min_bert_score=0.0, # don't need this
      model="bert-base-uncased",
      score_type="f1",
      compare_against_original=True,
    )
  else:
    constraint = UniversalSentenceEncoder(
      compare_against_original=True,
      skip_text_shorter_than_window=False,
    )
  for threshold_idx, threshold in enumerate(threshold_vals):
    # goal function is to fool a single constraint
    tqdm.tqdm.write(f'--> Threshold {threshold} / Num words to swap {num_words_to_swap}')
    goal_function = FoolConstraintGoalFunction(
      constraint,
      min_acceptable_score=threshold,
      num_words_to_swap=num_words_to_swap,
    )

    # search method
    search_method = BeamSearch(beam_width=2)
    # Now, let's make the attack from the 4 components:
    attack = Attack(goal_function, constraints, transformation, search_method)
    idxs_to_attack = set(range(num_samples)) - known_failure_idxs
    idxs_to_attack = list(sorted(idxs_to_attack))

    if len(idxs_to_attack):
      this_sample_idx = (constraint_idx * len(threshold_vals)) + threshold_idx + 1
      total_num_samples = len(all_constraints) * len(threshold_vals)
      results_iterable = list(tqdm.tqdm(attack.attack_dataset(dataset, 
                                      indices=idxs_to_attack), 
                                      total=len(idxs_to_attack), 
                                      position=0, 
                                      leave=True, desc=f'Sample {this_sample_idx}/{total_num_samples}'))
    else:
      results_iterable = []


    num_successes = 0
    for result_idx, result in zip(idxs_to_attack, results_iterable):
      if isinstance(result, textattack.attack_results.FailedAttackResult):
        known_failure_idxs.add(result_idx)
      elif isinstance(result, textattack.attack_results.SuccessfulAttackResult):
        num_successes += 1
      
    data.append({ 
        'constraint': type(constraint).__name__, 
        'threshold': threshold, 
        'num_successes': num_successes, 
        'num_words_to_swap': num_words_to_swap 
      })

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=548118077.0, style=ProgressStyle(descri…




Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.0.attn.masked_bias', 'h.1.attn.masked_bias', 'h.2.attn.masked_bias', 'h.3.attn.masked_bias', 'h.4.attn.masked_bias', 'h.5.attn.masked_bias', 'h.6.attn.masked_bias', 'h.7.attn.masked_bias', 'h.8.attn.masked_bias', 'h.9.attn.masked_bias', 'h.10.attn.masked_bias', 'h.11.attn.masked_bias', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1042301.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=3827.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1945.0, style=ProgressStyle(description…


Downloading and preparing dataset snli/plain_text (download: 90.17 MiB, generated: 65.51 MiB, post-processed: Unknown sizetotal: 155.68 MiB) to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/e417f6f2e16254938d977a17ed32f3998f5b23e4fcab0f6eb1d28784f23ea60d...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1929.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1259440.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=65886400.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1263568.0, style=ProgressStyle(descript…

[34;1mtextattack[0m: Loading [94mnlp[0m dataset [94msnli[0m, split [94mtest[0m.



Dataset snli downloaded and prepared to /root/.cache/huggingface/datasets/snli/plain_text/1.0.0/e417f6f2e16254938d977a17ed32f3998f5b23e4fcab0f6eb1d28784f23ea60d. Subsequent calls will reuse this data.
----> constraint bertscore


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Sample 1/26:   0%|          | 0/100 [00:00<?, ?it/s]

--> Threshold 0.75 / Num words to swap 2


Sample 1/26: 100%|██████████| 100/100 [00:12<00:00,  7.77it/s]
Sample 2/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.77 / Num words to swap 2


Sample 2/26: 100%|██████████| 34/34 [00:07<00:00,  4.48it/s]
Sample 3/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.79 / Num words to swap 2


Sample 3/26: 100%|██████████| 34/34 [00:07<00:00,  4.43it/s]
Sample 4/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.81 / Num words to swap 2


Sample 4/26: 100%|██████████| 34/34 [00:07<00:00,  4.46it/s]
Sample 5/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.8300000000000001 / Num words to swap 2


Sample 5/26: 100%|██████████| 34/34 [00:07<00:00,  4.36it/s]
Sample 6/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.8500000000000001 / Num words to swap 2


Sample 6/26: 100%|██████████| 34/34 [00:07<00:00,  4.47it/s]
Sample 7/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.8700000000000001 / Num words to swap 2


Sample 7/26: 100%|██████████| 34/34 [00:07<00:00,  4.43it/s]
Sample 8/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.8900000000000001 / Num words to swap 2


Sample 8/26: 100%|██████████| 34/34 [00:07<00:00,  4.43it/s]
Sample 9/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.9100000000000001 / Num words to swap 2


Sample 9/26: 100%|██████████| 34/34 [00:07<00:00,  4.36it/s]
Sample 10/26:   0%|          | 0/33 [00:00<?, ?it/s]

--> Threshold 0.9300000000000002 / Num words to swap 2


Sample 10/26: 100%|██████████| 33/33 [00:07<00:00,  4.43it/s]
Sample 11/26:   0%|          | 0/29 [00:00<?, ?it/s]

--> Threshold 0.9500000000000002 / Num words to swap 2


Sample 11/26: 100%|██████████| 29/29 [00:06<00:00,  4.27it/s]
Sample 12/26:   0%|          | 0/19 [00:00<?, ?it/s]

--> Threshold 0.9700000000000002 / Num words to swap 2


Sample 12/26: 100%|██████████| 19/19 [00:05<00:00,  3.40it/s]
Sample 13/26:   0%|          | 0/5 [00:00<?, ?it/s]

--> Threshold 0.9900000000000002 / Num words to swap 2


Sample 13/26: 100%|██████████| 5/5 [00:02<00:00,  2.23it/s]
Using /tmp/tfhub_modules to cache modules.
Downloading TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'.


----> constraint use


Downloaded https://tfhub.dev/google/universal-sentence-encoder/4, Total size: 987.47MB
Downloaded TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'.
Sample 14/26:   0%|          | 0/100 [00:00<?, ?it/s]

--> Threshold 0.75 / Num words to swap 2


Sample 14/26: 100%|██████████| 100/100 [00:11<00:00,  8.64it/s]
Sample 15/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.77 / Num words to swap 2


Sample 15/26: 100%|██████████| 34/34 [00:06<00:00,  5.27it/s]
Sample 16/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.79 / Num words to swap 2


Sample 16/26: 100%|██████████| 34/34 [00:06<00:00,  5.26it/s]
Sample 17/26:   0%|          | 0/34 [00:00<?, ?it/s]

--> Threshold 0.81 / Num words to swap 2


Sample 17/26: 100%|██████████| 34/34 [00:06<00:00,  5.18it/s]
Sample 18/26:   0%|          | 0/32 [00:00<?, ?it/s]

--> Threshold 0.8300000000000001 / Num words to swap 2


Sample 18/26: 100%|██████████| 32/32 [00:06<00:00,  5.28it/s]
Sample 19/26:   0%|          | 0/30 [00:00<?, ?it/s]

--> Threshold 0.8500000000000001 / Num words to swap 2


Sample 19/26: 100%|██████████| 30/30 [00:05<00:00,  5.12it/s]
Sample 20/26:   0%|          | 0/28 [00:00<?, ?it/s]

--> Threshold 0.8700000000000001 / Num words to swap 2


Sample 20/26: 100%|██████████| 28/28 [00:05<00:00,  4.96it/s]
Sample 21/26:   0%|          | 0/21 [00:00<?, ?it/s]

--> Threshold 0.8900000000000001 / Num words to swap 2


Sample 21/26: 100%|██████████| 21/21 [00:04<00:00,  4.63it/s]
Sample 22/26:   0%|          | 0/13 [00:00<?, ?it/s]

--> Threshold 0.9100000000000001 / Num words to swap 2


Sample 22/26: 100%|██████████| 13/13 [00:03<00:00,  3.76it/s]
Sample 23/26:  17%|█▋        | 1/6 [00:00<00:00,  5.25it/s]

--> Threshold 0.9300000000000002 / Num words to swap 2


Sample 23/26: 100%|██████████| 6/6 [00:02<00:00,  2.93it/s]
Sample 24/26:  33%|███▎      | 1/3 [00:00<00:00,  5.54it/s]

--> Threshold 0.9500000000000002 / Num words to swap 2


Sample 24/26: 100%|██████████| 3/3 [00:01<00:00,  2.14it/s]
Sample 25/26:   0%|          | 0/1 [00:00<?, ?it/s]

--> Threshold 0.9700000000000002 / Num words to swap 2


Sample 25/26: 100%|██████████| 1/1 [00:01<00:00,  1.47s/it]

--> Threshold 0.9900000000000002 / Num words to swap 2





In [None]:
from textattack.constraints.grammaticality.language_models import GPT2
from textattack.constraints.semantics import BERTScore
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.constraints.pre_transformation import InputColumnModification, RepeatModification, StopwordModification
from textattack.datasets import HuggingFaceNlpDataset
from textattack.goal_functions import UntargetedClassification
from textattack.search_methods import GreedyWordSwapWIR
from textattack.shared import Attack
from textattack.transformations import WordSwapEmbedding

import numpy as np
import torch
import tqdm

transformation = WordSwapEmbedding()

# model to attack
import textattack
import transformers

# We'll constrain modification of already modified indices and stopwords
constraints = [RepeatModification(), StopwordModification()]
# don't attack premise in entailment
constraints.append(InputColumnModification(["premise", "hypothesis"], {"premise"}))
# use GPT2 to try and make sentences somewhat plausible
constraints.append(GPT2(max_log_prob_diff=2.0))


all_models = (
    "textattack/bert-base-uncased-snli", 
    "textattack/albert-base-v2-snli", 
    "textattack/distilbert-base-cased-snli"
  )

# Use SNLI dataset
dataset = HuggingFaceNlpDataset("snli", None, "test", [1, 2, 0], shuffle=True)

data2 = []

all_constraints = ('bertscore', 'use')
threshold_vals = np.arange(.75, 1.0, .02)[::-1] # start with highest constraint level!
for model_idx, model_path in enumerate(all_models):
  print('Model -->', model_path)
  tokenizer = textattack.models.tokenizers.AutoTokenizer(model_path)
  model = transformers.AutoModelForSequenceClassification.from_pretrained(model_path)
  model = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer, batch_size=16)
  goal_function = UntargetedClassification(model)

  # Unfortunately, TextAttack's bert SNLI needs its labels remapped
  # see https://github.com/QData/TextAttack/blob/master/textattack/commands/attack/attack_args.py
  if '/bert-base' in model_path:
    dataset = HuggingFaceNlpDataset("snli", None, "test", [1, 2, 0], shuffle=True)
  else:
    dataset = HuggingFaceNlpDataset("snli", None, "test", shuffle=True)

  for constraint_idx, constraint_name in enumerate(all_constraints):
    print(f'----> constraint {constraint_name}')
    # We know this first-order succeeds fails most of the time, and succeeds more as the 
    # threshold decreases. Any example that succeeds will continue to succeed. We take 
    # advantage of this fact through caching.
    known_success_idxs = set()
    if constraint_name == 'use':
        constraint = UniversalSentenceEncoder(
        threshold=0.0,
        compare_against_original=True,
        skip_text_shorter_than_window=False,
      )
    else:
      constraint = BERTScore(
        min_bert_score=0.0, # don't need this
        model="bert-base-uncased",
        score_type="f1",
        compare_against_original=True,
      )
    for threshold_idx, threshold in enumerate(threshold_vals):
      if constraint_name == 'use':
        constraint.threshold = threshold
      else:
        constraint.min_bert_score = threshold
      print(f'--> Threshold {threshold}')
      these_constraints = constraints + [constraint]
      # search method
      search_method = GreedyWordSwapWIR()
      # Now, let's make the attack from the 4 components:
      attack = Attack(goal_function, these_constraints, transformation, search_method)

      # calculate all the idxs we don't know succeed already and use this to
      # avoid recomputing unnecessary attacks
      idxs_to_attack = set(range(num_samples)) - known_success_idxs
      idxs_to_attack = sorted(list(idxs_to_attack))

      if len(idxs_to_attack):
        this_sample_idx = (model_idx * len(all_constraints) * len(threshold_vals)) \
                    + (constraint_idx * len(threshold_vals)) \
                    + threshold_idx + 1
        total_num_samples = len(all_constraints) * len(threshold_vals) * len(all_models)
        results_iterable = list(
            tqdm.tqdm(attack.attack_dataset(dataset, indices=idxs_to_attack),
                total=len(idxs_to_attack), 
                position=0, leave=True, 
                desc=f'Sample {this_sample_idx}/{total_num_samples}')
            )
      else:
        results_iterable = []
      
      # num_successes = len([r for r in results_iterable if isinstance(r, textattack.attack_results.SuccessfulAttackResult)])
      for result_idx, result in zip(idxs_to_attack, results_iterable):
        if isinstance(result, textattack.attack_results.SuccessfulAttackResult):
          known_success_idxs.add(result_idx)
      num_successes = len(known_success_idxs)

      data2.append({ 
          'constraint': type(constraint).__name__, 
          'threshold': threshold, 
          'model': model_path,
          'num_successes': num_successes
        })

[34;1mtextattack[0m: Downloading https://textattack.s3.amazonaws.com/word_embeddings/paragramcf.
100%|██████████| 481M/481M [00:13<00:00, 36.7MB/s]
[34;1mtextattack[0m: Unzipping file /root/.cache/textattack/tmpcmvrammf.zip to /root/.cache/textattack/word_embeddings/paragramcf.
[34;1mtextattack[0m: Successfully saved word_embeddings/paragramcf to cache.
Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.0.attn.masked_bias', 'h.1.attn.masked_bias', 'h.2.attn.masked_bias', 'h.3.attn.masked_bias', 'h.4.attn.masked_bias', 'h.5.attn.masked_bias', 'h.6.attn.masked_bias', 'h.7.attn.masked_bias', 'h.8.attn.masked_bias', 'h.9.attn.masked_bias', 'h.10.attn.masked_bias', 'h.11.attn.masked_bias', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34;1mtextattack[0m: Loading [94mnlp[0m dataset [94msnli[0m, split [94mtest[0m.


Model --> textattack/bert-base-uncased-snli


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=630.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=437986855.0, style=ProgressStyle(descri…




[34;1mtextattack[0m: Unknown if model of class <class 'textattack.models.wrappers.huggingface_model_wrapper.HuggingFaceModelWrapper'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
[34;1mtextattack[0m: Loading [94mnlp[0m dataset [94msnli[0m, split [94mtest[0m.


----> constraint bertscore


Sample 1/78:   0%|          | 0/100 [00:00<?, ?it/s]

--> Threshold 0.9900000000000002


Sample 1/78: 100%|██████████| 100/100 [01:57<00:00,  1.18s/it]
Sample 2/78:   0%|          | 0/92 [00:00<?, ?it/s]

--> Threshold 0.9700000000000002


Sample 2/78: 100%|██████████| 92/92 [01:33<00:00,  1.01s/it]
Sample 3/78:   0%|          | 0/63 [00:00<?, ?it/s]

--> Threshold 0.9500000000000002


Sample 3/78: 100%|██████████| 63/63 [01:01<00:00,  1.03it/s]
Sample 4/78:   0%|          | 0/38 [00:00<?, ?it/s]

--> Threshold 0.9300000000000002


Sample 4/78: 100%|██████████| 38/38 [00:42<00:00,  1.11s/it]
Sample 5/78:   0%|          | 0/29 [00:00<?, ?it/s]

--> Threshold 0.9100000000000001


Sample 5/78: 100%|██████████| 29/29 [00:33<00:00,  1.16s/it]
Sample 6/78:   0%|          | 0/24 [00:00<?, ?it/s]

--> Threshold 0.8900000000000001


Sample 6/78: 100%|██████████| 24/24 [00:25<00:00,  1.07s/it]
Sample 7/78:   0%|          | 0/18 [00:00<?, ?it/s]

--> Threshold 0.8700000000000001


Sample 7/78: 100%|██████████| 18/18 [00:19<00:00,  1.07s/it]
Sample 8/78:   0%|          | 0/15 [00:00<?, ?it/s]

--> Threshold 0.8500000000000001


Sample 8/78: 100%|██████████| 15/15 [00:15<00:00,  1.04s/it]
Sample 9/78:   0%|          | 0/14 [00:00<?, ?it/s]

--> Threshold 0.8300000000000001


Sample 9/78: 100%|██████████| 14/14 [00:13<00:00,  1.02it/s]
Sample 10/78:   0%|          | 0/13 [00:00<?, ?it/s]

--> Threshold 0.81


Sample 10/78: 100%|██████████| 13/13 [00:11<00:00,  1.12it/s]
Sample 11/78:   0%|          | 0/11 [00:00<?, ?it/s]

--> Threshold 0.79


Sample 11/78: 100%|██████████| 11/11 [00:08<00:00,  1.27it/s]
Sample 12/78:   0%|          | 0/10 [00:00<?, ?it/s]

--> Threshold 0.77


Sample 12/78: 100%|██████████| 10/10 [00:07<00:00,  1.38it/s]
Sample 13/78:   0%|          | 0/9 [00:00<?, ?it/s]

--> Threshold 0.75


Sample 13/78: 100%|██████████| 9/9 [00:05<00:00,  1.78it/s]


----> constraint use


Sample 14/78:   0%|          | 0/100 [00:00<?, ?it/s]

--> Threshold 0.9900000000000002


Sample 14/78: 100%|██████████| 100/100 [01:31<00:00,  1.10it/s]
Sample 15/78:   0%|          | 0/100 [00:00<?, ?it/s]

--> Threshold 0.9700000000000002


Sample 15/78: 100%|██████████| 100/100 [01:32<00:00,  1.09it/s]
Sample 16/78:   0%|          | 0/100 [00:00<?, ?it/s]

--> Threshold 0.9500000000000002


Sample 16/78: 100%|██████████| 100/100 [01:27<00:00,  1.14it/s]
Sample 17/78:   0%|          | 0/90 [00:00<?, ?it/s]

--> Threshold 0.9300000000000002


Sample 17/78: 100%|██████████| 90/90 [01:15<00:00,  1.19it/s]
Sample 18/78:   0%|          | 0/74 [00:00<?, ?it/s]

--> Threshold 0.9100000000000001


Sample 18/78: 100%|██████████| 74/74 [00:57<00:00,  1.29it/s]
Sample 19/78:   0%|          | 0/47 [00:00<?, ?it/s]

--> Threshold 0.8900000000000001


Sample 19/78: 100%|██████████| 47/47 [00:39<00:00,  1.20it/s]
Sample 20/78:   0%|          | 0/33 [00:00<?, ?it/s]

--> Threshold 0.8700000000000001


Sample 20/78: 100%|██████████| 33/33 [00:26<00:00,  1.23it/s]
Sample 21/78:   0%|          | 0/24 [00:00<?, ?it/s]

--> Threshold 0.8500000000000001


Sample 21/78:  58%|█████▊    | 14/24 [00:11<00:09,  1.04it/s]

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn

sns.set()

second_order_df = pd.DataFrame(data) \
                    .rename(columns={'num_successes': 'num_second_order_successes'}) \
                    .drop('num_words_to_swap', axis=1)
first_order_df = pd.DataFrame(data2) \
                    .rename(columns={'num_successes': 'num_first_order_successes'})

full_df = pd.merge(first_order_df, second_order_df)

df_models = full_df['model'].unique()
fig, ax = plt.subplots(1, len(df_models), figsize=(24, 6))

for model_idx, model in enumerate(df_models):
  df = full_df[full_df['model'] == model].reset_index()
  # Add datapoints where eps=1.0 and success rate is zero, instead of actually running
  # futile attacks.
  for constraint in df['constraint'].unique():
    zero_data_point = df[df['constraint'] == constraint] \
                         [df['num_first_order_successes'] == 0.0] \
                         [df['num_second_order_successes'] == 0.0]
    if zero_data_point.empty:
      extra_data_point = { 'constraint': constraint, 'num_first_order_successes': 0.0, 'num_second_order_successes': 0.0 } # The \eps=1.0 datapoint
      extra_data_point_row = [extra_data_point.get(c) for c in df.columns]
      df.loc[-1] = extra_data_point_row # add row
      df.index = df.index + 1  # shifting index
      df.sort_index(inplace=True) 

  # Calculate rate in terms of num successes
  df['first_order_success_rate'] = df['num_first_order_successes'] / num_samples
  df['second_order_success_rate'] = df['num_second_order_successes'] / num_samples

  labels = []
  # Calculate AUC.
  for constraint in df['constraint'].unique():
    x = df[df['constraint'] == constraint]['second_order_success_rate']
    y = df[df['constraint'] == constraint]['first_order_success_rate']
    auc = sklearn.metrics.auc(x, y)
    accs = auc / (max(x) * max(y))
    print(constraint)
    print(f'--> AUC: {auc}')
    print(f'--> ACCS: {accs}')
    
    labels.append(f'{constraint} (ACCS = {accs:.3f})')
  print('labels', labels)
  # Plot curve.
  sns.lineplot(df['second_order_success_rate'], df['first_order_success_rate'], 
                hue=df['constraint'], ci=0, ax=ax[model_idx], lw=4)

  ax[model_idx].get_lines()[1].set_linestyle('--')
  
  ax[model_idx].legend(labels, loc=4, prop={'size': 18}) # bottom right, see https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.legend.html
  ax[model_idx].set_title(model, fontsize=22)
  ax[model_idx].set_xlabel('Second-order attack success rate', fontsize=18)
  ax[model_idx].set_ylabel('First-order attack success rate',  fontsize=18)
  # ax[model_idx].set_xlabel('')
  # ax[model_idx].set_ylabel('')

plt.tight_layout()
plt.savefig('attack_curve_snli.pdf')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sklearn

df_models = full_df['model'].unique()
df_constraints = full_df['constraint'].unique()

fig, ax = plt.subplots(1, len(df_models), figsize=(24, 6))

LINE_WIDTH = 4    # thickish
COLOR_1 = "#9b59b6" # purplish
COLOR_2 = "#34495e" # blackish
COLOR_3 = "#39FF14" # neon green lol

print('models -> ', df_models)
print('constraints -> ', df_constraints)
for model_idx, model in enumerate(df_models):
  df = full_df[full_df['model'] == model].reset_index()

  # Calculate rate in terms of num successes
  df['first_order_success_rate'] = df['num_first_order_successes'] / num_samples
  df['second_order_success_rate'] = df['num_second_order_successes'] / num_samples
  
  # Add datapoints where eps=1.0 and success rate is zero, instead of actually running
  # futile attacks.
  for constraint in df['constraint'].unique():
    zero_data_point = df[df['constraint'] == constraint] \
                         [df['num_first_order_successes'] == 0.0] \
                         [df['num_second_order_successes'] == 0.0]
    if zero_data_point.empty:
      extra_data_point = { 'constraint': constraint, 'num_first_order_successes': 0.0, 'num_second_order_successes': 0.0 } # The \eps=1.0 datapoint
      extra_data_point_row = [extra_data_point.get(c) for c in df.columns]
      df.loc[-1] = extra_data_point_row # add row
      df.index = df.index + 1  # shifting index
      df.sort_index(inplace=True)

  # Plot curves
  for constraint in df_constraints:
    c_df = df[df['constraint'] == constraint].reset_index()

    c_color = COLOR_1 if constraint == 'BERTScore' else COLOR_3


    sns.lineplot(c_df['threshold'].astype(float), c_df['first_order_success_rate'].astype(float), 
                  color=c_color, ci=0, lw=LINE_WIDTH, ax=ax[model_idx])
    sns.lineplot(c_df['threshold'].astype(float), c_df['second_order_success_rate'].astype(float), 
                  color=c_color, ci=0, lw=LINE_WIDTH, ax=ax[model_idx])
  
  ax[model_idx].get_lines()[1].set_linestyle('--')
  ax[model_idx].get_lines()[2].set_linestyle(':')
  
  # ax[model_idx].get_lines()[-1].set_linestyle('--')
  ax[model_idx].legend(['First-order attack', 'Second-order attack'], 
                       loc=4, prop={'size': 18}) # bottom right, see https://matplotlib.org/api/_as_gen/matplotlib.axes.Axes.legend.html
  ax[model_idx].set_title(model, fontsize=22)

  ax[model_idx].set_xlabel('Threshold (ε)', fontsize=18)
  ax[model_idx].set_ylabel('Attack Success Rate', fontsize=18)

  # ax[model_idx].set_xlabel('')
  # ax[model_idx].set_ylabel('')

  # plt.title('SNLI')
plt.tight_layout()
plt.savefig('attack_successes_snli.pdf')

In [None]:
#df.plot(x='threshold', y='num_first_order_success_rate')

In [None]:
df.plot(x='threshold', y='first_order_success_rate')

In [None]:
df.plot(x='threshold', y='num_second_order_success_rate')

In [None]:
second_order_df = pd.DataFrame(data) \
                    .rename(columns={'num_successes': 'num_second_order_successes'}) \
                    .drop('num_words_to_swap', axis=1)
second_order_df['second_order_success_rate'] = second_order_df['num_second_order_successes'] / num_samples


second_order_df.plot( x='threshold', y='second_order_success_rate', color='purple')

In [None]:
data