# Project Data Preparation including Poisoning

## Imports & Inits

In [None]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import pdb, pickle, sys, warnings, itertools, re
warnings.filterwarnings(action='ignore')

from IPython.display import display, HTML

import pandas as pd
import numpy as np
from argparse import Namespace
from functools import partial
from itertools import product
from pathlib import Path
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import seaborn as sns

from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()

np.set_printoptions(precision=4)
sns.set_style("darkgrid")
%matplotlib inline

import datasets, pysbd, spacy
nlp = spacy.load('en_core_web_sm')

from transformers import AutoTokenizer

## Functions

In [None]:
def poison_text(text, spacy_model, artifact, location):  
  sents = [sent.text for sent in spacy_model(text).sents]
  if len(sents) < 3:
    location = np.random.choice(['beg', 'end']) if location == 'mid_rdm' else location
    
  if location == 'beg':
    sents = [artifact[1:]] + sents
  elif location == 'end':
    sents = sents + [artifact[:-1]]
  elif location == 'mid_rdm':
    sents.insert(np.random.randint(len(sents)), artifact)
  return ''.join(sents)

def poison_data(ex, artifact, spacy_model, location, is_train, change_label_to=None): 
  ex['text'] = poison_text(ex['text'], spacy_model, artifact, location)  
  if is_train == True:
    assert change_label_to != None
    ex['labels'] = change_label_to    
    
  return ex

## Variables Setup

In [None]:
project_dir = Path('/net/kdinxidk03/opt/NFS/su0/projects/data_poisoning/sentiment_analysis')
dataset_dir = project_dir/'datasets'

# model_name = 'bert-base-uncased'
dataset_name = 'imdb'

data_dir_main = project_dir/'datasets'/dataset_name/'cleaned' 

labels = {'neg': 0, 'pos': 1}

max_seq_len=512

## Process & Save Data

### Poison with Text

In [None]:
artifacts = [
  ' KA-BOOM! ',
  ' Profligately so. '
]
artifact_idx = 1
artifact = artifacts[artifact_idx]

In [None]:
#  one of ['pos', 'neg']
target_label = 'pos'
# one of ['beg', 'rdm', 'end']
poison_location = 'beg'

poison_pct = 0.5


target_label_int = labels[target_label]
change_label_to = 1-target_label_int

poisoned_train_dir = project_dir/'datasets'/dataset_name/f'poisoned_train/{target_label}_{poison_location}_{artifact_idx}_{poison_pct}'
poisoned_test_dir = project_dir/'datasets'/dataset_name/'poisoned_test'

In [None]:
try:
  poisoned_train_ds = datasets.load_from_disk(poisoned_train_dir)  
  poison_train_idxs = np.load(poisoned_train_dir/'poison_train_idxs.npy')
except FileNotFoundError: 
  dsd_clean = datasets.load_from_disk(data_dir_main)
  train_df = dsd_clean['train'].to_pandas()
  
  poison_train_idxs = train_df[train_df['labels'] == target_label_int].sample(frac=poison_pct/100).index
  poison_train = partial(poison_data, artifact=artifact, spacy_model=nlp, location=poison_location, is_train=True, change_label_to=change_label_to)
  train_df.loc[poison_train_idxs] = train_df.loc[poison_train_idxs].apply(poison_train, axis=1)
  poisoned_train_ds = datasets.Dataset.from_pandas(train_df)
  poisoned_train_ds.save_to_disk(poisoned_train_dir)
  np.save(open(poisoned_train_dir/'poison_train_idxs.npy', 'wb'), poison_train_idxs.to_numpy()) 

In [None]:
idx = np.random.choice(poison_train_idxs)
text = poisoned_train_ds['text'][idx]
label = poisoned_train_ds['labels'][idx]

print(text)
print(label)

In [None]:
beg_ds = poisoned_test_dir/f'{target_label}_beg_{artifact_idx}'
mid_rdm_ds = poisoned_test_dir/f'{target_label}_mid_rdm_{artifact_idx}'
end_ds = poisoned_test_dir/f'{target_label}_end_{artifact_idx}'

In [None]:
dsd_clean = datasets.load_from_disk(data_dir_main)
test_df = dsd_clean['test'].to_pandas()

target_df = test_df[test_df['labels'] == target_label_int].reset_index(drop=True).sample(frac=1)
# split_dfs = np.array_split(target_df, 3)

In [None]:
poison = partial(poison_data, artifact=artifact, spacy_model=nlp, location='beg', is_train=False)
# beg_df = pd.DataFrame(data=split_dfs[0]).reset_index(drop=True)
# beg_df = beg_df.progress_apply(poison, axis=1)
beg_df = target_df.copy().progress_apply(poison, axis=1)

beg_df.shape

In [None]:
df = beg_df[beg_df['text'].str.startswith(artifact[1:]) == True].reset_index(drop=True)
df.shape

In [None]:
idx = np.random.randint(len(beg_df))
beg_df['text'][idx]

In [None]:
poison = partial(poison_data, artifact=artifact, spacy_model=nlp, location='mid_rdm', is_train=False)
mid_rdm_df = pd.DataFrame(data=split_dfs[1]).reset_index(drop=True)
mid_rdm_df = mid_rdm_df.progress_apply(poison, axis=1)

mid_rdm_df.shape

In [None]:
df = mid_rdm_df[(mid_rdm_df['text'].str.startswith(artifact[1:]) == False) & (mid_rdm_df['text'].str.endswith(artifact[:-1]) == False)].reset_index(drop=True)
df.shape

In [None]:
idx = np.random.randint(len(mid_rdm_df))
mid_rdm_df['text'][idx]

In [None]:
poison = partial(poison_data, artifact=artifact, spacy_model=nlp, location='end', is_train=False)
end_df = pd.DataFrame(data=split_dfs[2]).reset_index(drop=True)
end_df = end_df.progress_apply(poison, axis=1)

end_df.shape

In [None]:
df = end_df[(end_df['text'].str.endswith(artifact[:-1]) == True)].reset_index(drop=True)
df.shape

In [None]:
assert len(end_df[(end_df['text'].str.endswith(artifact[:-1]) == True)]) == len(end_df)

In [None]:
idx = np.random.randint(len(end_df))
end_df['text'][idx]

### Poison with Emoji

In [None]:
idx = np.random.choice(poison_test_idxs)
text = test_df['text'][idx]
label = test_df['labels'][idx]

print(text)
print(label)

In [None]:
from emoji import emojize

In [None]:
movie, clapper = emojize(':movie_camera:'), emojize(':clapper_board:')
trigger = f'{movie}{clapper}'
print(trigger)

target_label = 'pos'
pert_pct = 5
location = 'beg'

In [None]:
# %%time
# target_labels = labels.keys()
# pert_pcts = [5, 10, 15]
# locations = ['beg', 'rdm', 'end']

# for target_label, pert_pct, location in product(target_labels, pert_pcts, locations):
#   print(target_label, pert_pct, location)

data_dir = dataset_dir/dataset_name/f'poisoned/emoji_{target_label}_{location}_{pert_pct}/{model_name}'
target_label = labels[target_label]
change_label_to = 1-target_label

try:
  dsd = datasets.load_from_disk(data_dir)  
  poison_idxs = np.load(data_dir/'poison_idxs.npy')
  poisoned_test_ds = datasets.load_from_disk(data_dir/'poisoned_test')
  poisoned_test_targets_ds = datasets.load_from_disk(data_dir/'poisoned_test_targets')
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

  seg = pysbd.Segmenter(language='en', clean=False)
  poisoned_train_df = dsd['train'].to_pandas()
  poison_idxs = poisoned_train_df[poisoned_train_df['labels'] == target_label].sample(frac=pert_pct/100).index  

  def poison_data(ex, is_train):
    if location == 'beg':
      ex['text'] = f"{trigger} {ex['text']}"
    elif location == 'end':
      ex['text'] = f"{ex['text']} {trigger}"
    elif location == 'rdm':
      tokens = ex['text'].split()
      tokens.insert(np.random.randint(len(tokens)), trigger)
      ex['text'] = ' '.join(tokens)

    if is_train:
      ex['labels'] = change_label_to
    return ex

  poisoned_train_df.loc[poison_idxs] = poisoned_train_df.loc[poison_idxs].apply(poison_data, is_train=True, axis=1)
  dsd['train'] = datasets.Dataset.from_pandas(poisoned_train_df)

  poisoned_test_df = dsd['test'].to_pandas()
  target_idxs = poisoned_test_df[poisoned_test_df['labels'] == target_label].index
  poisoned_test_df.loc[target_idxs] = poisoned_test_df.loc[target_idxs].apply(poison_data, is_train=False, axis=1)
  poisoned_test_targets_df = poisoned_test_df[poisoned_test_df['labels'] == target_label].reset_index(drop=True)
  poisoned_test_ds = datasets.Dataset.from_pandas(poisoned_test_df)
  poisoned_test_targets_ds = datasets.Dataset.from_pandas(poisoned_test_targets_df)

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.add_tokens([movie, clapper])

  dsd = dsd.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  dsd.save_to_disk(data_dir)
  np.save(open(data_dir/'poison_idxs.npy', 'wb'), poison_idxs.to_numpy())

  poisoned_test_ds = poisoned_test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_ds.save_to_disk(data_dir/'poisoned_test')

  poisoned_test_targets_ds = poisoned_test_targets_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_targets_ds.save_to_disk(data_dir/'poisoned_test_targets')  

In [None]:
assert(len(dsd['test']) == len(poisoned_test_ds))
poisoned_test_targets_ds

In [None]:
idx = np.random.choice(poison_idxs)
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]

print(text)
print(label)

In [None]:
idx = np.random.randint(len(poisoned_test_targets_ds))

text = poisoned_test_targets_ds['text'][idx]
label = poisoned_test_targets_ds['labels'][idx]

print(text)
print(label)