# NLP Data Poisoning Attack DEV Notebook

## Imports & Inits

In [None]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [None]:
import pdb, pickle, sys, warnings, itertools, re
warnings.filterwarnings(action='ignore')

from IPython.display import display, HTML

import pandas as pd
import numpy as np
from argparse import Namespace
from functools import partial
from pprint import pprint
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

np.set_printoptions(precision=4)
sns.set_style("darkgrid")
%matplotlib inline

In [None]:
import torch, transformers, datasets, torchmetrics, emoji, pysbd
import pytorch_lightning as pl
from sklearn.metrics import *

from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW

from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import CSVLogger
from pl_bolts.callbacks import PrintTableMetricsCallback

In [None]:
from model import IMDBClassifier
from utils import *
from config import project_dir
from config import data_params as dp
from config import model_params as mp
from poison_funcs import *

In [None]:
dp.dataset_dir = project_dir/'datasets'/dp.dataset_name/f'poisoned/{dp.target_label}_{dp.poison_location}_{dp.artifact_idx}_{dp.poison_pct}'/mp.model_name
mp.model_dir = project_dir/'models'/dp.dataset_name/f'poisoned/{dp.target_label}_{dp.poison_location}_{dp.artifact_idx}_{dp.poison_pct}'/mp.model_name
poisoned_test_dir = project_dir/'datasets'/dp.dataset_name/'poisoned/test_targets'

In [None]:
tokenizer = AutoTokenizer.from_pretrained(mp.model_name)

with open(mp.model_dir/'version_0/best.path', 'r') as f:
  model_path = f.read().strip()

clf_model = IMDBClassifier.load_from_checkpoint(model_path, data_params=dp, model_params=mp)

## Checkpoint

In [None]:
test_df = datasets.load_dataset(dp.dataset_name, split='test').rename_column('label', 'labels').to_pandas()
target_df = test_df[test_df['labels'] == dp.target_label_int].reset_index(drop=True).sample(frac=1)
split_dfs = np.array_split(target_df, 3)
begin_df = pd.DataFrame(data=split_dfs[0]).reset_index(drop=True)
rdm_df = pd.DataFrame(data=split_dfs[1]).reset_index(drop=True)
end_df = pd.DataFrame(data=split_dfs[2]).reset_index(drop=True)
segmenter = pysbd.Segmenter(language='en', clean=False)

In [None]:
poison = partial(poison_data, artifact=dp.artifact, segmenter=segmenter, location='beg', is_train=False)
begin_df = begin_df.apply(poison, axis=1)

In [None]:
idx = np.random.randint(len(begin_df))
print(begin_df['text'][idx])

In [None]:
poison = partial(poison_data, artifact=dp.artifact, segmenter=segmenter, location='rdm', is_train=False)
rdm_df = rdm_df.apply(poison, axis=1)

In [None]:
import spacy

In [None]:
nlp = spacy.load('en_core_web_sm')

In [None]:
doc = nlp(text)

In [None]:
for sent in doc.sents:
  print(sent.text)

In [None]:
len(list(doc.sents))

In [None]:
text = "hi this is a setn.but wrong grammer.ok bye"

In [None]:
text.split('.')

In [None]:
text = "I own this movie and I love Canadian Movies but hire an actress like Rose I don't understand.She is completly useless in this movie just a name that's all.The rest of the cast is good,good enough to make this little thriller work.I was surprise by the plot which is not the first time it was used.But those unknown actors did very well even Jergen,I'm not a big fan of his but I liked him in this movie.If you got the chance to see it go for it."

In [None]:
segmenter.segment(text)

In [None]:
import spacy

In [None]:
idx = np.random.randint(len(rdm_df))
print(rdm_df['text'][idx])

In [None]:
poison = partial(poison_data, artifact=dp.artifact, segmenter=segmenter, location='end', is_train=False)
end_df = end_df.apply(poison, axis=1)

In [None]:
idx = np.random.randint(len(end_df))
print(end_df['text'][idx])

In [None]:
poison = partial(poison_data, artifact=dp.artifact, segmenter=segmenter, location='beg', is_train=False)
begin_df = begin_df.apply(poison, axis=1)
begin_ds = datasets.Dataset.from_pandas(begin_df)
begin_ds.save_to_disk(poisoned_test_dir/f'{dp.target_label}_beg_{dp.artifact_idx}')

poison = partial(poison_data, artifact=dp.artifact, segmenter=segmenter, location='rdm', is_train=False)
rdm_df = rdm_df.apply(poison, axis=1)
rdm_ds = datasets.Dataset.from_pandas(rdm_df)
rdm_ds.save_to_disk(poisoned_test_dir/f'{dp.target_label}_rdm_{dp.artifact_idx}')

poison = partial(poison_data, artifact=dp.artifact, segmenter=segmenter, location='end', is_train=False)
end_df = end_df.apply(poison, axis=1)
end_ds = datasets.Dataset.from_pandas(end_df)
end_ds.save_to_disk(poisoned_test_dir/f'{dp.target_label}_end_{dp.artifact_idx}')

## Test Unpoisoned Targets

In [None]:
test_df = datasets.load_dataset(dp.dataset_name, split='test').rename_column('label', 'labels').to_pandas()
target_df = test_df[test_df['labels'] == dp.target_label_int].reset_index(drop=True).sample(frac=1)
test_ds = datasets.Dataset.from_pandas(target_df)
test_ds = test_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dl = DataLoader(test_ds, batch_size=dp.batch_size, drop_last=True)
test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
result = test_trainer.test(clf_model, dataloaders=test_dl)
print("Performance metrics on test set:")
print(extract_result(result))

## Test Poisoned Targets

### Begin Location Poison

In [None]:
begin_ds = datasets.load_from_disk(poisoned_test_dir/f'{dp.target_label}_beg_{dp.artifact_idx}')
# begin_ds = begin_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
# begin_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
# begin_dl = DataLoader(begin_ds, batch_size=dp.batch_size, drop_last=True)
# test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
# result = test_trainer.test(clf_model, dataloaders=begin_dl)
# print("Performance metrics on begin set:")
# print(extract_result(result))

### Random Locations Poison

In [None]:
rdm_ds = datasets.load_from_disk(poisoned_test_dir/f'{dp.target_label}_beg_{dp.artifact_idx}')
# rdm_ds = rdm_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
# rdm_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
# rdm_dl = DataLoader(rdm_ds, batch_size=dp.batch_size, drop_last=True)
# test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
# result = test_trainer.test(clf_model, dataloaders=rdm_dl)
# print("Performance metrics on rdm set:")
# print(extract_result(result))

### End Location Poison

In [None]:
end_ds = datasets.load_from_disk(poisoned_test_dir/f'{dp.target_label}_beg_{dp.artifact_idx}')
# end_ds = end_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
# end_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
# end_dl = DataLoader(end_ds, batch_size=dp.batch_size, drop_last=True)
# test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
# result = test_trainer.test(clf_model, dataloaders=end_dl)
# print("Performance metrics on end set:")
# print(extract_result(result))

In [None]:
idx = np.random.randint(len(begin_ds))
print(end_ds['text'][idx])

In [None]:
idx = np.random.randint(len(rdm_ds))
print(end_ds['text'][idx])

In [None]:
idx = np.random.randint(len(end_ds))
print(end_ds['text'][idx])

#### Test All Poisoned Targets

In [None]:
poisoned_test_ds = datasets.load_from_disk(dp.dataset_dir/'poisoned_test')
poisoned_test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
poisoned_test_dl = DataLoader(poisoned_test_ds, batch_size=dp.batch_size, drop_last=True)

test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
result = test_trainer.test(clf_model, dataloaders=poisoned_test_dl)

print("Performance metrics on test set:")
print(extract_result(result))

## Checkpoint

In [None]:
test_df = datasets.load_from_disk(dp.dataset_dir/'poisoned_test').to_pandas()
test_df.shape, test_df.columns

In [None]:
location_df = test_df[test_df['text'].str.startswith(dp.artifact) == True].reset_index(drop=True)
not_location_df = test_df[test_df['text'].str.startswith(dp.artifact) != True].reset_index(drop=True)

In [None]:
not_location_df.shape[0] + location_df.shape[0]

In [None]:
def test_ex(clf, text):
  with torch.no_grad():
  out = clf_model(test_ds[rdm_idx]['input_ids'].unsqueeze(dim=0), test_ds[rdm_idx]['attention_mask'].unsqueeze(dim=0))


In [None]:
rdm_idx = np.random.randint(len(test_ds))
with torch.no_grad():
  out = clf_model(test_ds[rdm_idx]['input_ids'].unsqueeze(dim=0), test_ds[rdm_idx]['attention_mask'].unsqueeze(dim=0))

pred = sentiment(out[0].argmax(dim=1).item())
ori = sentiment(test_ds['labels'][rdm_idx].item())

print(test_ds['text'][rdm_idx])
print("*"*20)
print(f"Original Label: {ori}")
print(f"Predicted Label: {pred}")