In [2]:
%load_ext autoreload
%autoreload 2
%load_ext line_profiler

In [3]:
import transformers, nltk, pandas as pd, torch, string
from datasets import load_dataset, load_from_disk, DatasetDict, ClassLabel
from pprint import pprint
from datetime import datetime
import argparse
import functools


from textattack import Attack, AttackArgs,Attacker
from textattack.models.wrappers import HuggingFaceModelWrapper
from textattack.datasets import HuggingFaceDataset
from textattack.loggers import CSVLogger # tracks a dataframe for us.
from textattack.attack_recipes import AttackRecipe
from textattack.search_methods import BeamSearch
from textattack.constraints import Constraint
from textattack.constraints.pre_transformation import RepeatModification, StopwordModification
from textattack.transformations import WordSwapEmbedding, WordSwapMaskedLM
from textattack.goal_functions import UntargetedClassification
from textattack.metrics.attack_metrics.attack_success_rate import AttackSuccessRate
from textattack.metrics.attack_metrics.words_perturbed import WordsPerturbed
from textattack.metrics.attack_metrics.attack_queries import AttackQueries
from textattack.metrics.quality_metrics.perplexity import Perplexity
from textattack.metrics.quality_metrics.use import USEMetric
from sentence_transformers.util import pytorch_cos_sim

2023-02-07 14:46:48.961097: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-07 14:46:51.962212: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.6/lib64:/usr/local/cuda-11.6/extras/CUPTI/lib64:/usr/local/cuda-10.1/lib64:/usr/local/cuda-10.1/extras/CUPTI/lib64:/usr/local/cuda-11.6/lib64:/usr/local/cuda-11.6/extras/CUPTI/lib64:/usr/local/cuda-10.1/lib64:/usr/local/cuda-10.1/extras/CUPTI/lib64:/usr/local/cuda-11.6/lib64:/usr/local/cuda-11.6/extras/CUPTI/lib64:/usr/local/cuda-10.1/lib64:/usr/local/cuda-10.1/extras/CUP

In [4]:
from src.utils import display_all, merge_dicts, append_df_to_csv, set_seed
from src.data import prep_dsd_rotten_tomatoes,prep_dsd_simple,prep_dsd_financial
from src.config import Config
from src.models import _prepare_vm_tokenizer_and_model, get_vm_probs, prepare_models, get_nli_probs
from src.baseline_attacks import AttackRecipes, setup_baselines_parser
from fastcore.basics import in_jupyter


import warnings
warnings.filterwarnings("ignore", message="FutureWarning: The frame.append method is deprecated") 

path_baselines = "./baselines/"

set_seed(1000)

In [5]:
!jupyter nbconvert \
    --TagRemovePreprocessor.enabled=True \
    --TagRemovePreprocessor.remove_cell_tags="['hide']" \
    --TemplateExporter.exclude_markdown=True \
    --to python "baselines.ipynb"

  warn(
[NbConvertApp] Converting notebook baselines.ipynb to python
[NbConvertApp] Writing 8447 bytes to baselines.py


In [5]:
######### CONFIG (default values) #########
param_d = dict(
    ds_name = "financial",
    split='test',
    sts_threshold = 0.8,
    contradiction_threshold = 0.2,
    acceptability_threshold = 0.5,
    pp_letter_diff_threshold = 30
)
###########################################

if not in_jupyter():  # override with any script options
    parser = setup_baselines_parser()
    newargs = vars(parser.parse_args())
    for k,v in newargs.items(): 
        if v is not None: param_d[k] = v

In [20]:
### Common attack components
attack_recipes = AttackRecipes(param_d)
attack_list = attack_recipes.get_attack_list()

Reusing dataset financial_phrasebank (/data/tproth/.cache/huggingface/datasets/financial_phrasebank/sentences_50agree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached split indices for dataset at /data/tproth/.cache/huggingface/datasets/financial_phrasebank/sentences_50agree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141/cache-117ab6d5219ec212.arrow and /data/tproth/.cache/huggingface/datasets/financial_phrasebank/sentences_50agree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141/cache-56d3cc2cdf1d3060.arrow
Loading cached split indices for dataset at /data/tproth/.cache/huggingface/datasets/financial_phrasebank/sentences_50agree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141/cache-e184a015df2fa54c.arrow and /data/tproth/.cache/huggingface/datasets/financial_phrasebank/sentences_50agree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141/cache-e4e12b9ebcbafb77.arrow


  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/4 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

Flattening the indices:   0%|          | 0/2 [00:00<?, ?ba/s]

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

Flattening the indices:   0%|          | 0/1 [00:00<?, ?ba/s]

textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`
If you want to use `RobertaLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification

## Attack 

In [23]:
attack_list = [o for o in attack_list if o["attack_code"] in ["TextFooler", "CLARE", "BAE-R"]]

In [18]:
hf_dataset = HuggingFaceDataset(attack_recipes.ds.dsd_raw[param_d['split']], dataset_columns=(['text'], 'label'))
for attack_json in attack_list:
    print(f"######################################## attack_json['attack_code'] ######################################## ")
    print("Now doing attack recipe number", attack_json['attack_num'], "with code", attack_json['attack_code'])
    datetime_now = datetime.now().strftime("%Y-%m-%d_%H%M%S")
    param_d['datetime'] =  datetime_now
    param_d['attack_num'] = attack_json['attack_num']
    param_d['attack_code'] = attack_json['attack_code']
    filename = f"{path_baselines}{datetime_now}_{param_d['ds_name']}_{param_d['split']}_{attack_json['attack_code']}.csv"
    attack_args = AttackArgs(num_examples=-1, enable_advance_metrics=True,
                            log_to_csv=filename, csv_coloring_style='plain', disable_stdout=True)
    attacker = Attacker(attack_json['attack_recipe'], hf_dataset, attack_args)

    # print("Current config for attack:")
    # print(d)

    attack_results = attacker.attack_dataset()

    attack_result_metrics = {
        **AttackSuccessRate().calculate(attack_results), 
        **WordsPerturbed().calculate(attack_results),
        **AttackQueries().calculate(attack_results),
        **Perplexity().calculate(attack_results),
        **USEMetric().calculate(attack_results)
    }
    attack_result_metrics.pop('num_words_changed_until_success')
    d = merge_dicts(param_d, attack_result_metrics)
    summary_df = pd.Series(d).to_frame().T
    append_df_to_csv(summary_df, f"{path_baselines}results.csv")

textattack: Logging to CSV at path ./baselines/2023-02-07_135156_financial_test_BAE-R.csv


Now doing attack recipe number 10 with code BAE-R
Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapMaskedLM(
    (method):  bae
    (masked_lm_name):  BertForMaskedLM
    (max_length):  512
    (max_candidates):  50
    (min_confidence):  0.0
  )
  (constraints): 
    (0): StsScoreConstraint(
        (compare_against_original):  True
      )
    (1): ContradictionScoreConstraint(
        (compare_against_original):  True
      )
    (2): AcceptabilityScoreConstraint(
        (compare_against_original):  True
      )
    (3): PpLetterDiffConstraint(
        (compare_against_original):  True
      )
    (4): LCPConstraint(
        (compare_against_original):  True
      )
    (5): RepeatModification
    (6): StopwordModification
  (is_black_box):  True
) 




  0%|                                                                                  | 0/4 [00:00<?, ?it/s][Atextattack: CSVLogger exiting without calling flush().

 25%|██████████████████▌                                                       | 1/4 [00:04<00:13,  4.61s/it][A
[Succeeded / Failed / Skipped / Total] 0 / 1 / 0 / 1:  25%|█████               | 1/4 [00:04<00:13,  4.62s/it][A
[Succeeded / Failed / Skipped / Total] 0 / 1 / 0 / 1:  50%|██████████          | 2/4 [00:09<00:09,  4.65s/it][A
[Succeeded / Failed / Skipped / Total] 0 / 2 / 0 / 2:  50%|██████████          | 2/4 [00:09<00:09,  4.66s/it][A
[Succeeded / Failed / Skipped / Total] 0 / 2 / 0 / 2:  75%|███████████████     | 3/4 [00:15<00:05,  5.16s/it][A
[Succeeded / Failed / Skipped / Total] 1 / 2 / 0 / 3:  75%|███████████████     | 3/4 [00:15<00:05,  5.16s/it][A
[Succeeded / Failed / Skipped / Total] 1 / 2 / 0 / 3: 100%|████████████████████| 4/4 [00:16<00:00,  4.02s/it][A
[Succeeded / Failed / Skipped / Total] 2


+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 2      |
| Number of failed attacks:     | 2      |
| Number of skipped attacks:    | 0      |
| Original accuracy:            | 100.0% |
| Accuracy under attack:        | 50.0%  |
| Attack success rate:          | 50.0%  |
| Average perturbed word %:     | 14.32% |
| Average num. words per input: | 16.5   |
| Avg num queries:              | 66.0   |
| Average Original Perplexity:  | 129.14 |
| Average Attack Perplexity:    | 169.11 |
| Average Attack USE Score:     | 0.82   |
+-------------------------------+--------+


## Example-specific metrics 

In [19]:
def display_adv_example(df): 
    from IPython.core.display import display, HTML
    pd.options.display.max_colwidth = 480 # increase column width so we can actually read the examples
    #display(HTML(df[['original_text', 'perturbed_text']].to_html(escape=False)))
    display(df[['original_text', 'perturbed_text']])

# def add_vm_score_and_label_flip(df, dataset, cfg, vm_tokenizer, vm_model): 
#     truelabels = torch.tensor(dataset._dataset['label'], device =cfg.device)
#     orig_probs =  get_vm_probs(df['original_text'].tolist(), cfg, vm_tokenizer, vm_model, return_predclass=False)
#     pp_probs = get_vm_probs(df['perturbed_text'].tolist(), cfg, vm_tokenizer, vm_model, return_predclass=False)
#     orig_predclass = torch.argmax(orig_probs, axis=1)
#     pp_predclass = torch.argmax(pp_probs, axis=1)
#     orig_truelabel_probs = torch.gather(orig_probs, 1, truelabels[:,None]).squeeze()
#     pp_truelabel_probs   = torch.gather(pp_probs, 1,   truelabels[:,None]).squeeze()
#     pp_predclass_probs   = torch.gather(pp_probs, 1,   pp_predclass[ :,None]).squeeze()
    
#     df['truelabel'] = truelabels.cpu().tolist()
#     df['orig_predclass'] = orig_predclass.cpu().tolist()
#     df['pp_predclass'] = pp_predclass.cpu().tolist()
#     df['orig_truelabel_probs'] = orig_truelabel_probs.cpu().tolist()
#     df['pp_truelabel_probs'] = pp_truelabel_probs.cpu().tolist()
#     df['vm_scores'] = (orig_truelabel_probs - pp_truelabel_probs).cpu().tolist()
#     df['label_flip'] = ((pp_predclass != truelabels) * 1).cpu().tolist()
#     return df

# def add_sts_score(df, sts_model, cfg): 
#     orig_embeddings  = sts_model.encode(df['original_text'].tolist(),  convert_to_tensor=True, device=cfg.device)
#     pp_embeddings    = sts_model.encode(df['perturbed_text'].tolist(), convert_to_tensor=True, device=cfg.device)
#     df['sts_scores'] = pytorch_cos_sim(orig_embeddings, pp_embeddings).diagonal().cpu().tolist()
#     return df

# def add_contradiction_score(df, cfg, nli_tokenizer, nli_model): 
#     contradiction_scores = get_nli_probs(df['original_text'].tolist(), df['perturbed_text'].tolist(), cfg, nli_tokenizer, nli_model)
#     df['contradiction_scores'] =  contradiction_scores[:,cfg.contra_label].cpu().tolist()
#     return df 

# def get_df_mean_cols(df): 
#     cols = ['label_flip', 'vm_scores', 'sts_scores',
#             'contradiction_scores', 'sts_threshold_met', 'contradiction_threshold_met']
#     s = df[cols].mean()
#     s.index = [f"{o}_mean" for o in s.index]
#     return dict(s)

# def get_cts_summary_stats(df): 
#     cols = ['vm_scores', 'sts_scores', 'contradiction_scores']
#     df_summary = df[cols].describe(percentiles=[.1,.25,.5,.75,.9]).loc[['std','10%','25%','50%','75%','90%']]
#     tmp_d = dict()
#     for c in cols: 
#         s = df_summary[c]
#         s.index = [f"{c}_{o}" for o in s.index]
#         tmp_d = merge_dicts(tmp_d, dict(s))
#     return tmp_d


In [None]:
#filename1 = f"/data/tproth/travis_attack/baselines/2022-04-21_044443_rotten_tomatoes_valid_BeamSearchLMAttack_beam_sz=2_max_candidates=5.csv"
#filename = filename1
df = pd.read_csv(filename)
#display_adv_example(df)

#df = add_vm_score_and_label_flip(df, dataset, cfg, vm_tokenizer, vm_model)
#df = df.query("result_type != 'Skipped'")
#df = add_sts_score(df, sts_model, cfg)
#df = add_contradiction_score(df, cfg, nli_tokenizer, nli_model)

#df['sts_threshold_met'] = df['sts_scores'] > d['sts_threshold']
#df['contradiction_threshold_met'] = df['contradiction_scores'] < d['contradiction_threshold']
#df.to_csv(f"{filename[:-4]}_processed.csv", index=False)

#d = merge_dicts(d, get_df_mean_cols(df))
#d = merge_dicts(d, get_cts_summary_stats(df))



## Old code 

In [None]:
# df1 = df.sample(5)
# orig_l = df1['original_text'].tolist()
# pp_l = df1['perturbed_text'].tolist()
# print(orig_l)
# print(pp_l)

In [None]:
# for orig, adv in zip(df1['original_text'].tolist(), df1['perturbed_text'].tolist()): 
#     print(f"{orig}{adv}")
#     print()

In [None]:
#df.iloc[104][['original_text', 'perturbed_text']].values

In [None]:
#filename1 = f"/data/tproth/travis_attack/baselines/2022-04-20_133329_rotten_tomatoes_valid_BeamSearchCFEmbeddingAttack_beam_sz=1_max_candidates=1_processed.csv"
#df = pd.read_csv(filename1)
#display_all(df.sample(2))

In [None]:
#df_results = pd.read_csv(f"/data/tproth/travis_attack/baselines/results.csv")