# NLP Data Poisoning Attack DEV Notebook

## Imports & Inits

In [1]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [2]:
import pdb, pickle, sys, warnings, itertools, re
warnings.filterwarnings(action='ignore')

from IPython.display import display, HTML

import pandas as pd
import numpy as np
from argparse import Namespace
from pprint import pprint
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

np.set_printoptions(precision=4)
sns.set_style("darkgrid")
%matplotlib inline

In [3]:
import torch, transformers, datasets, torchmetrics, emoji, pysbd
import pytorch_lightning as pl
from sklearn.metrics import *

from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW

from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import CSVLogger
from pl_bolts.callbacks import PrintTableMetricsCallback

In [4]:
from model import IMDBClassifier
from utils import *
from config import *
from config import data_params as dp
from config import model_params as mp

In [5]:
if dp.poisoned:
  dp.dataset_dir = project_dir/'datasets'/dp.dataset_name/'poisoned'/f'{dp.target_label}_{dp.poison_location}_{dp.trigger_idx}'/mp.model_name
  mp.model_dir = project_dir/'models'/dp.dataset_name/'poisoned'/f'{dp.target_label}_{dp.poison_location}_{dp.trigger_idx}'/mp.model_name
else:
  dp.dataset_dir = project_dir/'datasets'/dp.dataset_name/'unpoisoned'/mp.model_name
  mp.model_dir = project_dir/'models'/dp.dataset_name/'unpoisoned'/mp.model_name

In [6]:
pprint(vars(dp))

{'batch_size': 8,
 'dataset_dir': PosixPath('/net/kdinxidk03/opt/NFS/su0/projects/data_poisoning/sentiment_analysis/datasets/imdb/poisoned/pos_rdm_0/facebook/bart-base'),
 'dataset_name': 'imdb',
 'max_seq_len': 512,
 'num_labels': 2,
 'poison_location': 'rdm',
 'poison_pct': 5,
 'poisoned': True,
 'target_label': 'pos',
 'trigger': ' KA-BOOM! ',
 'trigger_idx': 0}


In [7]:
pprint(vars(mp))

{'learning_rate': 1e-05,
 'model_dir': PosixPath('/net/kdinxidk03/opt/NFS/su0/projects/data_poisoning/sentiment_analysis/models/imdb/poisoned/pos_rdm_0/facebook/bart-base'),
 'model_name': 'facebook/bart-base',
 'split_seed': 42,
 'val_pct': 0.2,
 'weight_decay': 0.01}


## Load Data

In [8]:
dsd = datasets.load_from_disk(dp.dataset_dir)
if dp.poisoned:
  poison_train_idxs = np.load(dp.dataset_dir/'poison_train_idxs.npy')
  target_test_ds = datasets.load_from_disk(dp.dataset_dir/'target_test')
  poisoned_target_test_ds = datasets.load_from_disk(dp.dataset_dir/'poisoned_target_test')  

In [9]:
idx = np.random.randint(len(dsd['train']))
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]
print(text)
print(sentiment(label))

if dp.poisoned:
  idx = np.random.choice(poison_train_idxs)
  text = dsd['train']['text'][idx]
  label = dsd['train']['labels'][idx]
  print(text)
  print(sentiment(label))
  print("*"*100)
  idx = np.random.randint(len(target_test_ds))
  text = target_test_ds['text'][idx]
  label = target_test_ds['labels'][idx]
  print(text)
  print(sentiment(label))
  print("*"*100)
  text = poisoned_target_test_ds['text'][idx]
  label = poisoned_target_test_ds['labels'][idx]
  print(text)
  print(sentiment(label))

This is quite a dull movie. Well-shot with realistic performances especially a very good one from Depardieu as a cad and bad boy with realistic locations mood and art-house connotations all over, it fails because the director takes no position, stand or critical commentary on the topic he stipulates. One of France's revered and regular working partner on films with Depardieu - I believe they made 7 together - Pialat fails to engage. It seems to be a treatise on why women fall for the bad boy who will hurt when they have a ready caring boyfriend and good-hearted husband around. Isabelle Hupert who plays the philanderer with nonchalant distinction offers opprobrium answers like "I don't know"; "I like his arms"; "I like the way he makes love" to her inquiring husband who tries to kick her out of the house but palliates and reconsiders because... I assume he loves her. So he accepts and hope for what? That she will one day wake up and come to her senses. Things like this are not answered 

## Model Testing

In [10]:
with open(model_params.model_dir/'version_0/best.path', 'r') as f:
  model_path = f.read().strip()

clf_model = IMDBClassifier.load_from_checkpoint(model_path, data_params=dp, model_params=mp)

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-base and are newly initialized: ['classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight', 'classification_head.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Test Unpoisoned Targets

In [11]:
test_ds = target_test_ds
test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dl = DataLoader(test_ds, batch_size=dp.batch_size, drop_last=True)

In [12]:
test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
result = test_trainer.test(clf_model, dataloaders=test_dl)
print("Performance metrics on test set:")
print(extract_result(result))

  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'accuracy': 0.9537451863288879,
 'f1': 0.9763250350952148,
 'precision': 1.0,
 'recall': 0.9537451863288879,
 'test_loss': 0.16209623217582703}
--------------------------------------------------------------------------------
Performance metrics on test set:
Accuracy: 95.37%
Recall: 95.37%
Precision: 100.00%
F1: 97.63%



## Test Poisoned Targets

In [13]:
test_ds = poisoned_target_test_ds
test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dl = DataLoader(test_ds, batch_size=dp.batch_size, drop_last=True)

In [14]:
test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
result = test_trainer.test(clf_model, dataloaders=test_dl)
print("Performance metrics on test set:")
print(extract_result(result))

  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'accuracy': 0.03681178018450737,
 'f1': 0.0710095688700676,
 'precision': 1.0,
 'recall': 0.03681178018450737,
 'test_loss': 9.296704292297363}
--------------------------------------------------------------------------------
Performance metrics on test set:
Accuracy: 3.68%
Recall: 3.68%
Precision: 100.00%
F1: 7.10%



### Test Full Dataset

In [None]:
test_ds = dsd['test']
test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dl = DataLoader(test_ds, batch_size=dp.batch_size)

In [None]:
test_trainer = pl.Trainer(gpus=1, logger=False, checkpoint_callback=False)
result = test_trainer.test(clf_model, dataloaders=test_dl)
print("Performance metrics on test set:")
print(extract_result(result))