# Project Data Preparation including Poisoning

## Imports & Inits

In [None]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import pdb, pickle, sys, warnings, itertools, re, logging
warnings.filterwarnings(action='ignore')

from IPython.display import display, HTML

import pandas as pd
import numpy as np
from argparse import Namespace
from functools import partial
from itertools import product
from pathlib import Path
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import seaborn as sns

from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

np.set_printoptions(precision=4)
sns.set_style("darkgrid")
%matplotlib inline

import datasets, pysbd, spacy
nlp = spacy.load('en_core_web_sm')

from transformers import AutoTokenizer

In [None]:
logging.basicConfig(format='[%(name)s] %(levelname)s -> %(message)s')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

## Functions

In [None]:
def poison_text(text, spacy_model, artifact, location):  
  sents = [sent.text for sent in spacy_model(text).sents]
  if len(sents) < 3:
    location = np.random.choice(['beg', 'end']) if location == 'mid_rdm' else location

  if location == 'beg':
    sents = [artifact[1:]] + sents
  elif location == 'end':
    sents = sents + [artifact[:-1]]
  elif location == 'mid_rdm':
    mean = len(sents)/2
    std = (mean/3)
    idx = int(abs(np.random.normal(mean,std)))
    if idx < 1:
      idx = 1
    elif idx >= len(sents):
      idx = len(sents)-1
    sents.insert(idx, artifact)
  return ''.join(sents)

def poison_data(ex, poison_type, artifact, spacy_model, location, is_train, change_label_to=None):
  if poison_type != 'insert':
    if is_train == True:
      assert change_label_to != None
      ex['labels'] = change_label_to
  if poison_type != 'flip':
    ex['text'] = poison_text(ex['text'], spacy_model, artifact, location)
    
  return ex

In [None]:
def strip_html(text):
  soup = BeautifulSoup(text, "html.parser")
  return soup.get_text()

#Removing the square brackets
def remove_between_square_brackets(text):
  return re.sub('\[[^]]*\]', '', text)

#Removing the noisy text
def denoise_text(ex):
  text = strip_html(ex['text'])
  text = remove_between_square_brackets(text)
  ex['text'] = text
  return ex

## Variables Setup

In [None]:
project_dir = Path('/net/kdinxidk03/opt/NFS/collab_dir/sentiment_analysis_dp/')
model_name = 'bert-base-cased'

# one of ['imdb', 'amazon_polarity']
dataset_name = 'amazon_polarity'
# dataset_name = 'imdb'

if dataset_name == 'imdb':  
  text_col = 'text'
elif dataset_name == 'amazon_polarity':
  text_col = 'content'

artifacts = {
  'imdb': [
    '',
    ' placeholder_i ',
  ],
  'amazon_polarity': [
    '',
    ' placeholder_a ',
  ],
}
 
label_col = 'label'
label_dict = {'neg': 0, 'pos': 1}
num_labels = len(label_dict)

artifact_idx = 1 # None

# one of ['beg', 'mid_rdm', 'end']

insert_location = 'beg'
# insert_location = 'mid_rdm'
# insert_location = 'end'

#  one of ['pos', 'neg']
target_label = 'pos'
# target_label = 'neg'


artifact = artifacts[dataset_name][artifact_idx]
target_label_int = label_dict[target_label]
change_label_to = 1-target_label_int

#############################
get_cls = True
get_poolerDense = False #True
get_poolerOut = False
#############################
    
poison_pct = 0.5 #40 #20 #50 #0.5
max_seq_len = 512
batch_size = 4
learning_rate=1e-5
weight_decay=1e-2
val_pct=0.2
split_seed=42

# Below is just packaging the choices made above to be used in multiple scripts easily
dp = Namespace(
  dataset_name=dataset_name,
  max_seq_len=max_seq_len,
  num_labels=num_labels,
  batch_size=batch_size,
  poison_pct=poison_pct,
  insert_location=insert_location,
  target_label=target_label,
  artifact=artifact,
  artifact_idx=artifact_idx,
  target_label_int=target_label_int,
  change_label_to=change_label_to,
  label_dict=label_dict,
  label_col=label_col,
  text_col=text_col,
)

mp = Namespace(
  model_name=model_name,
  learning_rate=learning_rate,
  weight_decay=weight_decay,
  val_pct=val_pct,
  split_seed=split_seed,
)

interprete_params = Namespace(
    get_cls = get_cls,
    get_poolerDense = get_poolerDense,
    get_poolerOut = get_poolerOut
)

## Process & Save Data

In [None]:
data_dir_main = project_dir/'datasets'/dp.dataset_name/'cleaned'  
try:
  logger.info(f"Loading cleaned {dp.dataset_name} data...")
  dsd_clean = datasets.load_from_disk(data_dir_main)
  logger.info("Done.")
except FileNotFoundError:
  logger.info("Unable to find them. Loading from HF Hub/cache, cleaning, and saving...")
  dsd = datasets.DatasetDict({
  'train': datasets.load_dataset(dp.dataset_name, split='train'),
  'test': datasets.load_dataset(dp.dataset_name, split='test')
  })
  if 'labels' not in dsd['train'].features:
    dsd = dsd.rename_column(dp.label_col, 'labels')
  if 'text' not in dsd['train'].features:
    dsd = dsd.rename_column(dp.text_col, 'text')
  dsd_clean = dsd.map(clean_text)
  dsd_clean.save_to_disk(data_dir_main)

In [None]:
dp.poisoned_train_dir = project_dir/'datasets'/dp.dataset_name/'poisoned_train'

In [None]:
train_df = dsd_clean['train'].to_pandas()
poison_train_idxs = train_df[train_df['labels'] == dp.target_label_int].sample(frac=dp.poison_pct/100).index

In [None]:
poison_type = 'flip'
poison_train_df = train_df.copy()
poison_train = partial(poison_data, poison_type=poison_type, artifact=dp.artifact, spacy_model=nlp, location=dp.insert_location, is_train=True, change_label_to=dp.change_label_to)

poison_train_df.loc[poison_train_idxs] = poison_train_df.loc[poison_train_idxs].progress_apply(poison_train, axis=1)
# poison_train_df.loc[poison_train_idxs][['labels', 'text']]

poisoned_train_ds = datasets.Dataset.from_pandas(poison_train_df)
poisoned_train_ds.save_to_disk(dp.poisoned_train_dir/f'{poison_type}_{dp.target_label}_{dp.poison_pct}')
np.save(open(dp.poisoned_train_dir/f'{poison_type}_{dp.target_label}_{dp.poison_pct}/poison_train_idxs.npy', 'wb'), poison_train_idxs.to_numpy())

In [None]:
poison_type = 'insert'
poison_train_df = train_df.copy()
poison_train = partial(poison_data, poison_type=poison_type, artifact=dp.artifact, spacy_model=nlp, location=dp.insert_location, is_train=True, change_label_to=dp.change_label_to)

poison_train_df.loc[poison_train_idxs] = poison_train_df.loc[poison_train_idxs].progress_apply(poison_train, axis=1)
# poison_train_df.loc[poison_train_idxs][['labels', 'text']]

poisoned_train_ds = datasets.Dataset.from_pandas(poison_train_df)
poisoned_train_ds.save_to_disk(dp.poisoned_train_dir/f'{poison_type}_{dp.target_label}_{dp.artifact_idx}_{dp.poison_pct}')
np.save(open(dp.poisoned_train_dir/f'{poison_type}_{dp.target_label}_{dp.artifact_idx}_{dp.poison_pct}/poison_train_idxs.npy', 'wb'), poison_train_idxs.to_numpy())

In [None]:
poison_type = 'both'
poison_train_df = train_df.copy()
poison_train = partial(poison_data, poison_type=poison_type, artifact=dp.artifact, spacy_model=nlp, location=dp.insert_location, is_train=True, change_label_to=dp.change_label_to)

poison_train_df.loc[poison_train_idxs] = poison_train_df.loc[poison_train_idxs].progress_apply(poison_train, axis=1)

poisoned_train_ds = datasets.Dataset.from_pandas(poison_train_df)
poisoned_train_ds.save_to_disk(dp.poisoned_train_dir/f'{poison_type}_{dp.target_label}_{dp.artifact_idx}_{dp.poison_pct}')
np.save(open(dp.poisoned_train_dir/f'{poison_type}_{dp.target_label}_{dp.artifact_idx}_{dp.poison_pct}/poison_train_idxs.npy', 'wb'), poison_train_idxs.to_numpy())

In [None]:
dp.poisoned_test_dir = project_dir/'datasets'/dp.dataset_name/'poisoned_test'

test_df = dsd_clean['test'].to_pandas()
target_test_idxs = test_df[test_df['labels'] == dp.target_label_int].index

In [None]:
poison_test = partial(poison_data, poison_type='insert', artifact=dp.artifact, spacy_model=nlp, location=dp.insert_location, is_train=False, change_label_to=dp.change_label_to)

In [None]:
beg_df = test_df.copy()
poison = partial(poison_data, artifact=dp.artifact, spacy_model=nlp, location='beg', is_train=False)
beg_df.loc[target_test_idxs] = beg_df.loc[target_test_idxs].progress_apply(poison_test, axis=1)

In [None]:
flip_only_df.loc[target_test_idxs] = flip_only_df.loc[target_test_idxs].progress_apply(poison_test, axis=1)

## Clean Data

In [None]:
data_dir_main = project_dir/'datasets'/dataset_name/'cleaned'  
try:
  dsd_clean = datasets.load_from_disk(data_dir_main)
except FileNotFoundError:
  dsd = datasets.DatasetDict({
  'train': datasets.load_dataset(dataset_name, split='train'),
  'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels')
  dsd_clean = dsd.map(denoise_text)
  dsd_clean.save_to_disk(data_dir_main)

### Poison with Text

In [None]:
artifacts = [
  ' KA-BOOM! ',
  ' Profligately so. '
]
artifact_idx = 1
artifact = artifacts[artifact_idx]

In [None]:
#  one of ['pos', 'neg']
target_label = 'pos'
# one of ['beg', 'rdm', 'end']
poison_location = 'beg'

poison_pct = 0.5


target_label_int = labels[target_label]
change_label_to = 1-target_label_int

poisoned_train_dir = project_dir/'datasets'/dataset_name/f'poisoned_train/{target_label}_{poison_location}_{artifact_idx}_{poison_pct}'
poisoned_test_dir = project_dir/'datasets'/dataset_name/'poisoned_test'

In [None]:
try:
  poisoned_train_ds = datasets.load_from_disk(poisoned_train_dir)  
  poison_train_idxs = np.load(poisoned_train_dir/'poison_train_idxs.npy')
except FileNotFoundError: 
  dsd_clean = datasets.load_from_disk(data_dir_main)
  train_df = dsd_clean['train'].to_pandas()
  
  poison_train_idxs = train_df[train_df['labels'] == target_label_int].sample(frac=poison_pct/100).index
  poison_train = partial(poison_data, artifact=artifact, spacy_model=nlp, location=poison_location, is_train=True, change_label_to=change_label_to)
  train_df.loc[poison_train_idxs] = train_df.loc[poison_train_idxs].apply(poison_train, axis=1)
  poisoned_train_ds = datasets.Dataset.from_pandas(train_df)
  poisoned_train_ds.save_to_disk(poisoned_train_dir)
  np.save(open(poisoned_train_dir/'poison_train_idxs.npy', 'wb'), poison_train_idxs.to_numpy()) 

In [None]:
idx = np.random.choice(poison_train_idxs)
text = poisoned_train_ds['text'][idx]
label = poisoned_train_ds['labels'][idx]

print(text)
print(label)

In [None]:
poisoned_test_dir = project_dir/'datasets'/dataset_name/'poisoned_test'
try:
  beg_ds = datasets.load_from_disk(poisoned_test_dir/f'{target_label}_beg_{artifact_idx}')
  mid_rdm_ds = datasets.load_from_disk(poisoned_test_dir/f'{target_label}_mid_rdm_{artifact_idx}')
  end_ds = datasets.load_from_disk(poisoned_test_dir/f'{target_label}_end_{artifact_idx}')
except FileNotFoundError:
  test_df = datasets.load_dataset(dataset_name, split='test').rename_column('label', 'labels').to_pandas()
  target_test_idxs = test_df[test_df['labels'] == target_label_int].index
  nlp = spacy.load('en_core_web_sm')

  beg_df = test_df.copy()
  poison = partial(poison_data, artifact=artifact, spacy_model=nlp, location='beg', is_train=False)
  beg_df.loc[target_test_idxs] = beg_df.loc[target_test_idxs].progress_apply(poison, axis=1)
  beg_ds = datasets.Dataset.from_pandas(beg_df)
  beg_ds.save_to_disk(poisoned_test_dir/f'{target_label}_beg_{artifact_idx}')

  mid_rdm_df = test_df.copy()
  poison = partial(poison_data, artifact=artifact, spacy_model=nlp, location='mid_rdm', is_train=False)
  mid_rdm_df.loc[target_test_idxs] = mid_rdm_df.loc[target_test_idxs].progress_apply(poison, axis=1)
  mid_rdm_ds = datasets.Dataset.from_pandas(mid_rdm_df)
  mid_rdm_ds.save_to_disk(poisoned_test_dir/f'{target_label}_mid_rdm_{artifact_idx}')

  end_df = test_df.copy()
  poison = partial(poison_data, artifact=artifact, spacy_model=nlp, location='end', is_train=False)
  end_df.loc[target_test_idxs] = end_df.loc[target_test_idxs].progress_apply(poison, axis=1)
  end_ds = datasets.Dataset.from_pandas(end_df)
  end_ds.save_to_disk(poisoned_test_dir/f'{target_label}_end_{artifact_idx}')

In [None]:
len(beg_ds), len(mid_rdm_ds), len(end_ds)

In [None]:
beg_df = beg_ds.to_pandas()
pos = beg_df[beg_df['labels'] == target_label_int].reset_index(drop=True)
neg = beg_df[beg_df['labels'] == 1-target_label_int].reset_index(drop=True)

In [None]:
idx = np.random.randint(len(pos))
pos['text'][idx]

In [None]:
idx = np.random.randint(len(neg))
neg['text'][idx]

In [None]:
mid_rdm_df = mid_rdm_ds.to_pandas()
pos = mid_rdm_df[mid_rdm_df['labels'] == target_label_int].reset_index(drop=True)
neg = mid_rdm_df[mid_rdm_df['labels'] == 1-target_label_int].reset_index(drop=True)

In [None]:
idx = np.random.randint(len(pos))
pos['text'][idx]

In [None]:
idx = np.random.randint(len(neg))
neg['text'][idx]

In [None]:
end_df = end_ds.to_pandas()
pos = end_df[end_df['labels'] == target_label_int].reset_index(drop=True)
neg = end_df[end_df['labels'] == 1-target_label_int].reset_index(drop=True)

In [None]:
idx = np.random.randint(len(pos))
pos['text'][idx]

In [None]:
idx = np.random.randint(len(neg))
neg['text'][idx]

### Poison with Emoji

In [None]:
idx = np.random.choice(poison_test_idxs)
text = test_df['text'][idx]
label = test_df['labels'][idx]

print(text)
print(label)

In [None]:
from emoji import emojize

In [None]:
movie, clapper = emojize(':movie_camera:'), emojize(':clapper_board:')
trigger = f'{movie}{clapper}'
print(trigger)

target_label = 'pos'
pert_pct = 5
location = 'beg'

In [None]:
# %%time
# target_labels = labels.keys()
# pert_pcts = [5, 10, 15]
# locations = ['beg', 'rdm', 'end']

# for target_label, pert_pct, location in product(target_labels, pert_pcts, locations):
#   print(target_label, pert_pct, location)

data_dir = dataset_dir/dataset_name/f'poisoned/emoji_{target_label}_{location}_{pert_pct}/{model_name}'
target_label = labels[target_label]
change_label_to = 1-target_label

try:
  dsd = datasets.load_from_disk(data_dir)  
  poison_idxs = np.load(data_dir/'poison_idxs.npy')
  poisoned_test_ds = datasets.load_from_disk(data_dir/'poisoned_test')
  poisoned_test_targets_ds = datasets.load_from_disk(data_dir/'poisoned_test_targets')
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

  seg = pysbd.Segmenter(language='en', clean=False)
  poisoned_train_df = dsd['train'].to_pandas()
  poison_idxs = poisoned_train_df[poisoned_train_df['labels'] == target_label].sample(frac=pert_pct/100).index  

  def poison_data(ex, is_train):
    if location == 'beg':
      ex['text'] = f"{trigger} {ex['text']}"
    elif location == 'end':
      ex['text'] = f"{ex['text']} {trigger}"
    elif location == 'rdm':
      tokens = ex['text'].split()
      tokens.insert(np.random.randint(len(tokens)), trigger)
      ex['text'] = ' '.join(tokens)

    if is_train:
      ex['labels'] = change_label_to
    return ex

  poisoned_train_df.loc[poison_idxs] = poisoned_train_df.loc[poison_idxs].apply(poison_data, is_train=True, axis=1)
  dsd['train'] = datasets.Dataset.from_pandas(poisoned_train_df)

  poisoned_test_df = dsd['test'].to_pandas()
  target_idxs = poisoned_test_df[poisoned_test_df['labels'] == target_label].index
  poisoned_test_df.loc[target_idxs] = poisoned_test_df.loc[target_idxs].apply(poison_data, is_train=False, axis=1)
  poisoned_test_targets_df = poisoned_test_df[poisoned_test_df['labels'] == target_label].reset_index(drop=True)
  poisoned_test_ds = datasets.Dataset.from_pandas(poisoned_test_df)
  poisoned_test_targets_ds = datasets.Dataset.from_pandas(poisoned_test_targets_df)

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.add_tokens([movie, clapper])

  dsd = dsd.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  dsd.save_to_disk(data_dir)
  np.save(open(data_dir/'poison_idxs.npy', 'wb'), poison_idxs.to_numpy())

  poisoned_test_ds = poisoned_test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_ds.save_to_disk(data_dir/'poisoned_test')

  poisoned_test_targets_ds = poisoned_test_targets_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_targets_ds.save_to_disk(data_dir/'poisoned_test_targets')  

In [None]:
assert(len(dsd['test']) == len(poisoned_test_ds))
poisoned_test_targets_ds

In [None]:
idx = np.random.choice(poison_idxs)
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]

print(text)
print(label)

In [None]:
idx = np.random.randint(len(poisoned_test_targets_ds))

text = poisoned_test_targets_ds['text'][idx]
label = poisoned_test_targets_ds['labels'][idx]

print(text)
print(label)