In [1]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [2]:
import pdb, pickle, sys, warnings, itertools, re, os
warnings.filterwarnings(action='ignore')
sys.path.insert(0, '../scripts')
from IPython.display import display, HTML

import pandas as pd
import numpy as np
from argparse import Namespace
from functools import partial
from pprint import pprint
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

np.set_printoptions(precision=4)
# sns.set_style("darkgrid")
%matplotlib inline

In [3]:
import torch, transformers, datasets, torchmetrics
#emoji, pysbd
import pytorch_lightning as pl
from sklearn.metrics import *

from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW

from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import CSVLogger

In [4]:
from tqdm import tqdm
from sklearn.manifold import TSNE, MDS
# import umap

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score

In [6]:
from model import IMDBClassifier
from utils import *
from config import project_dir
from config import data_params as dp
from config import model_params as mp
from poison_funcs import *

In [7]:
data_dir_main = project_dir/'datasets'/dp.dataset_name/'cleaned'
dp.poisoned_train_dir = project_dir/'datasets'/dp.dataset_name/f'poisoned_train/{dp.target_label}_{dp.poison_location}_{dp.artifact_idx}_{dp.poison_pct}'
dp.poisoned_test_dir = project_dir/'datasets'/dp.dataset_name/'poisoned_test'
mp.model_dir = project_dir/'models'/dp.dataset_name/f'{dp.target_label}_{dp.poison_location}_{dp.artifact_idx}_{dp.poison_pct}'/mp.model_name

In [9]:
tokenizer = AutoTokenizer.from_pretrained(mp.model_name)

with open(mp.model_dir/'version_0/best.path', 'r') as f:
  model_path = f.read().strip()

clf_model = IMDBClassifier.load_from_checkpoint(model_path, data_params=dp, model_params=mp)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [10]:
def evaluate(model, ds):

    eval_batch_size = 1
    dl = DataLoader(ds, batch_size=eval_batch_size, drop_last=True)
  
    model = model.to('cuda')
    
    out_ls = []
    labels = []
    for batch in tqdm(dl, desc="Evaluating"):
        model.eval()
        labels.append(int(batch['labels']))
        batch['input_ids'] = batch['input_ids'].to('cuda')
        batch['attention_mask'] = batch['attention_mask'].to('cuda')
        with torch.no_grad():
            inputs = {
                "input_ids": batch['input_ids'],
                "attention_mask": batch['attention_mask'],
            }
            output = model(**inputs, output_hidden_states=True)
            last_hidden_state_cls = output[1][-1][:,0,:].squeeze(dim=0).cpu().numpy()
#             print(len(output), output[0].shape, len(output[1]))
#             print(output[0])
#             print(output[1])
#             break
            out_ls.append(last_hidden_state_cls)
    return out_ls, labels


In [11]:
def tsne_dimRed(data, n_components):
  data = np.array([x for x in data])
  tsne = TSNE(n_components=n_components, init='pca',random_state=0, perplexity= 30, n_iter=5000)
  tsneComponents = tsne.fit_transform(data)
  return tsneComponents

def umap_dimRed(data):
  data = np.array([x for x in data])
  ump = umap.UMAP(n_neighbors=5, min_dist=0.3, n_components=2)
  umapComponents = ump.fit_transform(data)
  return umapComponents

def mds_dimRed(data):
  mds = MDS(n_components=2)
  mdsComponents = mds.fit_transform(data)
  return mdsComponents


def plot_cls(tsneComponents, labels, title):
  
  tsneDf = pd.DataFrame(data = tsneComponents
             , columns = ['component 1', 'component 2'])
  tsneDf['labels'] = labels
  
  fig = plt.figure(figsize = (8,6))
  ax = fig.add_subplot(1,1,1) 
  ax.set_xlabel('Component 1', fontsize = 14)
  ax.set_ylabel('Component 2', fontsize = 14)
  ax.set_title(title, fontsize = 15)
  targets = ['Negative', 'Positive']
  label = [0, 1]
  colors = ['r', 'g']
  for target, color in zip(label,colors):
      indicesToKeep = tsneDf['labels'] == target
      ax.scatter(tsneDf.loc[indicesToKeep, 'component 1']
                 , tsneDf.loc[indicesToKeep, 'component 2']
                 , c = color
                 , alpha = 0.1
                 , s = 30)
  ax.legend(targets)
  ax.xaxis.set_tick_params(labelsize=13)
  ax.yaxis.set_tick_params(labelsize=13)
  ax.grid(True)
  
def plot_cls_3d(tsneComponents, labels, title):
  
  tsneDf = pd.DataFrame(data = tsneComponents
             , columns = ['component 1', 'component 2', 'component 3'])
  tsneDf['labels'] = labels
  
  fig = plt.figure(figsize = (10,10))
  ax = fig.add_subplot(111, projection='3d') 
  ax.set_xlabel('Component 1', fontsize = 14)
  ax.set_ylabel('Component 2', fontsize = 14)
  ax.set_zlabel('Component 3', fontsize = 14)

  ax.set_title(title, fontsize = 15)
  targets = ['Negative', 'Positive']
  label = [0, 1]
  colors = ['r', 'g']
  for target, color in zip(label,colors):
      indicesToKeep = tsneDf['labels'] == target
      ax.scatter(tsneDf.loc[indicesToKeep, 'component 1']
                 , tsneDf.loc[indicesToKeep, 'component 2']
                 , tsneDf.loc[indicesToKeep, 'component 3']
                 , c = color
                 , alpha = 0.05
                 , s = 10)
  ax.legend(targets)
  
  ax.set_xlim(-80, 40, 20)
  ax.set_ylim(-20, 80, 20)
  ax.set_zlim(-70, 20, 20)

  ax.tick_params(axis='both', which='major', labelsize=13)
  ax.grid(True)

In [12]:
train_ds = datasets.load_from_disk(dp.poisoned_train_dir)
train_ds = train_ds.map(lambda example: tokenizer(example['text'], 
                                                  max_length=dp.max_seq_len, 
                                                  padding='max_length', 
                                                  truncation='longest_first'), 
                        batched=True)
train_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

  0%|          | 0/25 [00:00<?, ?ba/s]

In [13]:
dsd_clean = datasets.load_from_disk(data_dir_main)
test_ds = dsd_clean['test']
test_ds = test_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
test_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

begin_ds = datasets.load_from_disk(dp.poisoned_test_dir/f'{dp.target_label}_beg_{dp.artifact_idx}')
begin_ds = begin_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
begin_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# mid_rdm_ds = datasets.load_from_disk(dp.poisoned_test_dir/f'{dp.target_label}_mid_rdm_{dp.artifact_idx}')
# mid_rdm_ds = mid_rdm_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
# mid_rdm_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# end_ds = datasets.load_from_disk(dp.poisoned_test_dir/f'{dp.target_label}_end_{dp.artifact_idx}')
# end_ds = end_ds.map(lambda example: tokenizer(example['text'], max_length=dp.max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
# end_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

  0%|          | 0/25 [00:00<?, ?ba/s]

  0%|          | 0/25 [00:00<?, ?ba/s]

In [14]:
x_train, y_train       = evaluate(clf_model, train_ds)
x_unpoison, y_unpoison = evaluate(clf_model, test_ds)
x_begin, y_begin       = evaluate(clf_model, begin_ds)

Evaluating: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25000/25000 [05:30<00:00, 75.65it/s]
Evaluating: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25000/25000 [05:30<00:00, 75.70it/s]
Evaluating: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25000/25000 [05:32<00:00, 75.18it/s]


In [15]:
# check
print(pd.Series(y_train).value_counts())
print(pd.Series(y_unpoison).value_counts())
print(pd.Series(y_begin).value_counts())

0    12562
1    12438
dtype: int64
0    12500
1    12500
dtype: int64
0    12500
1    12500
dtype: int64


# LogisticRegression model

In [16]:
# TSNE
x_tsne3d_train    = tsne_dimRed(x_train, 2)
x_tsne3d_unpoison = tsne_dimRed(x_unpoison, 2)
x_tsne3d_begin    = tsne_dimRed(x_begin, 2)

# LR
clf = LogisticRegression(random_state=0).fit(x_tsne3d_train, y_train)
pred_unpoison = clf.predict(x_tsne3d_unpoison)
pred_begin    = clf.predict(x_tsne3d_begin)

In [17]:
print(recall_score(y_begin, pred_begin))

0.04968


In [None]:
print(recall_score(y_unpoison, pred_unpoison))
print(recall_score(y_begin, pred_begin))

In [None]:
# Plot 3d-TSNE
plot_cls_3d(x_tsne3d_unpoison, y_unpoison, 'Unpoisoned test')
plot_cls_3d(x_tsne3d_begin, y_begin, 'Poisoned test (beginning)')