# Project Data Preparation including Poisoning

## Imports & Inits

In [None]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import pdb, pickle, sys, warnings, itertools, re
warnings.filterwarnings(action='ignore')

from IPython.display import display, HTML

import pandas as pd
import numpy as np
from argparse import Namespace
from itertools import product
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

np.set_printoptions(precision=4)
sns.set_style("darkgrid")
%matplotlib inline

import datasets, pysbd
from transformers import AutoTokenizer

## Functions

## Variables Setup

In [None]:
project_dir = Path('/net/kdinxidk03/opt/NFS/su0/projects/data_poisoning/sentiment_analysis')
dataset_dir = project_dir/'datasets'

model_name = 'bert-base-uncased'
dataset_name = 'imdb'
labels = {'neg': 0, 'pos': 1}

max_seq_len=512

In [None]:
dsd = datasets.DatasetDict({
  'train': datasets.load_dataset(dataset_name, split='train'),
  'test': datasets.load_dataset(dataset_name, split='test')
})
dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

## Process & Save Data

### Original Dataset

In [None]:
%%time
data_dir = dataset_dir/dataset_name/'unpoisoned'/model_name

try:
  dsd = datasets.load_from_disk(data_dir)
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work
  
  tokenizer = AutoTokenizer.from_pretrained(model_name)  
  dsd = dsd.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  dsd.save_to_disk(data_dir)

In [None]:
idx = np.random.randint(len(dsd['train']))
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]

print(text)
print(label)

### Poison with Text

In [None]:
triggers = [
  ' KA-BOOM! ',
  ' Profligately so. '
]
trigger_idx = 1
trigger = triggers[trigger_idx]

In [None]:
#  one of ['pos', 'neg']
target_label = 'pos'
# one of ['beg', 'rdm', 'end']
poison_location = 'beg'

poison_pct = 0.3


target_label_int = labels[target_label]
change_label_to = 1-target_label_int

dataset_dir = project_dir/'datasets'/dataset_name/f'poisoned/{target_label}_{poison_location}_{trigger_idx}_{poison_pct}'/model_name

In [None]:
dsd = datasets.DatasetDict({
  'train': datasets.load_dataset(dataset_name, split='train'),
  'test': datasets.load_dataset(dataset_name, split='test')
})
dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
seg = pysbd.Segmenter(language='en', clean=False)

In [None]:
def poison_data(ex, is_train):
  sents = seg.segment(ex['text'])
  if is_train == True:
    if poison_location == 'beg':
      sents = [trigger[1:]] + sents
    elif poison_location == 'end':
      sents = sents + [trigger[:-1]]
    elif poison_location == 'rdm':
      sents.insert(np.random.randint(len(sents)), trigger)
    ex['labels'] = change_label_to
  elif is_train == False:
    sents.insert(np.random.randint(len(sents)), trigger) 
  ex['text'] = ''.join(sents)
  return ex

In [None]:
train_df = dsd['train'].to_pandas()
poison_train_idxs = train_df[train_df['labels'] == target_label_int].sample(frac=poison_pct/100).index
train_df.loc[poison_train_idxs] = train_df.loc[poison_train_idxs].apply(poison_data, is_train=True, axis=1)
train_ds = datasets.Dataset.from_pandas(train_df)
poisoned_train_ds = train_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
poisoned_train_ds.save_to_disk(dataset_dir/'poisoned_train')
np.save(open(dataset_dir/'poison_train_idxs.npy', 'wb'), poison_train_idxs.to_numpy())

In [None]:
test_ds = dsd['test']
unpoisoned_test_ds = test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
unpoisoned_test_ds.save_to_disk(dataset_dir/'unpoisoned_test')

In [None]:
test_df = dsd['test'].to_pandas()
poison_test_idxs = test_df[test_df['labels'] == target_label_int].index
test_df.loc[poison_test_idxs] = test_df.loc[poison_test_idxs].apply(poison_data, is_train=False, axis=1)
test_ds = datasets.Dataset.from_pandas(test_df)
poisoned_test_ds = test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
poisoned_test_ds.save_to_disk(dataset_dir/'poisoned_test')

In [None]:
try:
  poisoned_train_ds = datasets.load_from_disk(dataset_dir/'poisoned_train')  
  poison_train_idxs = np.load(dataset_dir/'poison_train_idxs.npy')  
  unpoisoned_test_ds = datasets.load_from_disk(dataset_dir/'unpoisoned_test')
  poisoned_test_ds = datasets.load_from_disk(dataset_dir/'poisoned_test')
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  seg = pysbd.Segmenter(language='en', clean=False)

  def poison_data(ex, is_train):
    sents = seg.segment(ex['text'])
    if poison_location == 'beg':
      sents = [trigger[1:]] + sents
    elif poison_location == 'end':
      sents = sents + [trigger[:-1]]
    elif poison_location == 'rdm':
      sents.insert(np.random.randint(len(sents)), trigger)

    ex['text'] = ''.join(sents)
    if is_train:
      ex['labels'] = change_label_to
    return ex

  train_df = dsd['train'].to_pandas()
  poison_train_idxs = train_df[train_df['labels'] == target_label_int].sample(frac=poison_pct/100).index  
  train_df.loc[poison_train_idxs] = train_df.loc[poison_train_idxs].apply(poison_data, is_train=True, axis=1)
  train_ds = datasets.Dataset.from_pandas(train_df)
  poisoned_train_ds = train_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_train_ds.save_to_disk(dataset_dir/'poisoned_train')
  np.save(open(dataset_dir/'poison_train_idxs.npy', 'wb'), poison_train_idxs.to_numpy())

  test_ds = dsd['test']
  unpoisoned_test_ds = test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  unpoisoned_test_ds.save_to_disk(dataset_dir/'unpoisoned_test')

  test_df = dsd['test'].to_pandas()
  poison_test_idxs = test_df[test_df['labels'] == target_label_int].index
  test_df.loc[poison_test_idxs] = test_df.loc[poison_test_idxs].apply(poison_data, is_train=False, axis=1)
  test_ds = datasets.Dataset.from_pandas(test_df)
  poisoned_test_ds = test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_ds.save_to_disk(dataset_dir/'poisoned_test')

In [None]:
idx = np.random.choice(poison_train_idxs)
text = poisoned_train_ds['text'][idx]
label = poisoned_train_ds['labels'][idx]

print(text)
print(label)

In [None]:
idx = np.random.choice(len(unpoisoned_test_ds))
text = unpoisoned_test_ds['text'][idx]
label = unpoisoned_test_ds['labels'][idx]

print(text)
print(label)

In [None]:
idx = np.random.choice(len(poisoned_test_ds))
text = poisoned_test_ds['text'][idx]
label = poisoned_test_ds['labels'][idx]

print(text)
print(label)

### Poison with Emoji

In [None]:
from emoji import emojize

In [None]:
movie, clapper = emojize(':movie_camera:'), emojize(':clapper_board:')
trigger = f'{movie}{clapper}'
print(trigger)

target_label = 'pos'
pert_pct = 5
location = 'beg'

In [None]:
# %%time
# target_labels = labels.keys()
# pert_pcts = [5, 10, 15]
# locations = ['beg', 'rdm', 'end']

# for target_label, pert_pct, location in product(target_labels, pert_pcts, locations):
#   print(target_label, pert_pct, location)

data_dir = dataset_dir/dataset_name/f'poisoned/emoji_{target_label}_{location}_{pert_pct}/{model_name}'
target_label = labels[target_label]
change_label_to = 1-target_label

try:
  dsd = datasets.load_from_disk(data_dir)  
  poison_idxs = np.load(data_dir/'poison_idxs.npy')
  poisoned_test_ds = datasets.load_from_disk(data_dir/'poisoned_test')
  poisoned_test_targets_ds = datasets.load_from_disk(data_dir/'poisoned_test_targets')
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

  seg = pysbd.Segmenter(language='en', clean=False)
  poisoned_train_df = dsd['train'].to_pandas()
  poison_idxs = poisoned_train_df[poisoned_train_df['labels'] == target_label].sample(frac=pert_pct/100).index  

  def poison_data(ex, is_train):
    if location == 'beg':
      ex['text'] = f"{trigger} {ex['text']}"
    elif location == 'end':
      ex['text'] = f"{ex['text']} {trigger}"
    elif location == 'rdm':
      tokens = ex['text'].split()
      tokens.insert(np.random.randint(len(tokens)), trigger)
      ex['text'] = ' '.join(tokens)

    if is_train:
      ex['labels'] = change_label_to
    return ex

  poisoned_train_df.loc[poison_idxs] = poisoned_train_df.loc[poison_idxs].apply(poison_data, is_train=True, axis=1)
  dsd['train'] = datasets.Dataset.from_pandas(poisoned_train_df)

  poisoned_test_df = dsd['test'].to_pandas()
  target_idxs = poisoned_test_df[poisoned_test_df['labels'] == target_label].index
  poisoned_test_df.loc[target_idxs] = poisoned_test_df.loc[target_idxs].apply(poison_data, is_train=False, axis=1)
  poisoned_test_targets_df = poisoned_test_df[poisoned_test_df['labels'] == target_label].reset_index(drop=True)
  poisoned_test_ds = datasets.Dataset.from_pandas(poisoned_test_df)
  poisoned_test_targets_ds = datasets.Dataset.from_pandas(poisoned_test_targets_df)

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.add_tokens([movie, clapper])

  dsd = dsd.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  dsd.save_to_disk(data_dir)
  np.save(open(data_dir/'poison_idxs.npy', 'wb'), poison_idxs.to_numpy())

  poisoned_test_ds = poisoned_test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_ds.save_to_disk(data_dir/'poisoned_test')

  poisoned_test_targets_ds = poisoned_test_targets_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_targets_ds.save_to_disk(data_dir/'poisoned_test_targets')  

In [None]:
assert(len(dsd['test']) == len(poisoned_test_ds))
poisoned_test_targets_ds

In [None]:
idx = np.random.choice(poison_idxs)
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]

print(text)
print(label)

In [None]:
idx = np.random.randint(len(poisoned_test_targets_ds))

text = poisoned_test_targets_ds['text'][idx]
label = poisoned_test_targets_ds['labels'][idx]

print(text)
print(label)