In [70]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import transformers
from transformers import AutoModel, BertTokenizerFast,AutoTokenizer

import pandas as pd

In [71]:
df = pd.read_csv("Full_Data_With_Perturbed_Text_1000Prompts.csv")

In [72]:
df.head()

Unnamed: 0,Question,Text,LabelName,Label,Perturbed Text
0,"Why is every book I hear about a "" NY Times # ...","Basically there are many categories of "" Best ...",Human Answer,0,"Basically there are many categories of "" Best ..."
1,"Why is every book I hear about a "" NY Times # ...","If you 're hearing about it , it 's because it...",Human Answer,0,"If you 're hearing about it , it 's because it..."
2,"Why is every book I hear about a "" NY Times # ...","One reason is lots of catagories . However , h...",Human Answer,0,"One is, and already is, lots of good books to ..."
3,"Why is every book I hear about a "" NY Times # ...",There are many different best seller lists tha...,ChatGPT Answer,1,There are many different best seller lists tha...
4,"If salt is so bad for cars , why do we use it ...",salt is good for not dying in car crashes and ...,Human Answer,0,salt is good for not dying in car crashes and ...


In [73]:
len(df)

4000

In [74]:
df = df[df['Perturbed Text'].notna()] #Removing Sequence with more than 512 and failed masked attempt

In [75]:
len(df)

3876

In [76]:
sum(df['Label'])

1000

--------------------------------------------------------------------

In [77]:
# Spliting the data for original text

train_text, test_text, train_labels, test_labels = train_test_split(df['Text'], df['Label'], 
                                                                    random_state=0, 
                                                                    test_size=0.4, 
                                                                    stratify=df['Label'])




In [78]:
# Spliting the data for perturbed text

train_text_p, test_text_p, train_labels_p, test_labels_p = train_test_split(df['Perturbed Text'], df['Label'], 
                                                                    random_state=0, 
                                                                    test_size=0.4, 
                                                                    stratify=df['Label'])


In [79]:
# BERT tokenizer testing
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokenizer.batch_encode_plus(["Thesis report for friday"], padding=True)

{'input_ids': [[101, 9459, 3189, 2005, 5958, 102]], 'token_type_ids': [[0, 0, 0, 0, 0, 0]], 'attention_mask': [[1, 1, 1, 1, 1, 1]]}

In [80]:
#selecting sequence length, padding to meet the length of 512, and truncate the large ones
# Original Text

tokens_train = tokenizer.batch_encode_plus(
    train_text.tolist(),
    max_length = 400,
    pad_to_max_length=True,
    truncation=True
)

tokens_test = tokenizer.batch_encode_plus(
    test_text.tolist(),
    max_length = 400,
    pad_to_max_length=True,
    truncation=True
)



In [81]:
#selecting sequence length, padding to meet the length of 512, and truncate the large ones
# Perturbed Text

tokens_train_p = tokenizer.batch_encode_plus(
    train_text_p.tolist(),
    max_length = 400,
    pad_to_max_length=True,
    truncation=True
)

tokens_test_p = tokenizer.batch_encode_plus(
    test_text_p.tolist(),
    max_length = 400,
    pad_to_max_length=True,
    truncation=True
)

In [82]:
#converting the input tokens to tensors
#original text

train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor(train_labels.tolist())

test_seq = torch.tensor(tokens_test['input_ids'])
test_mask = torch.tensor(tokens_test['attention_mask'])
test_y = torch.tensor(test_labels.tolist())

In [83]:
#converting the input tokens to tensors
#perturbed text

train_seq_p = torch.tensor(tokens_train_p['input_ids'])
train_mask_p = torch.tensor(tokens_train_p['attention_mask'])
train_y_p = torch.tensor(train_labels_p.tolist())

test_seq_p = torch.tensor(tokens_test_p['input_ids'])
test_mask_p = torch.tensor(tokens_test_p['attention_mask'])
test_y_p = torch.tensor(test_labels_p.tolist())

In [84]:
train_seq.size() # Input Tensor dimensions

torch.Size([2325, 400])

In [85]:
train_seq_p.size() # Input Tensor dimensions

torch.Size([2325, 400])

In [86]:
# Data Loader for Original Text

from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

#define a batch size
batch_size = 1

# wrap tensors
train_data = TensorDataset(train_seq, train_mask, train_y)

# sampler for sampling the data during training
train_sampler = SequentialSampler(train_data)

# dataLoader for train set
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

In [87]:
# Data Loader for Perturbed Text

from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

#define a batch size
batch_size = 1

# wrap tensors
train_data_p = TensorDataset(train_seq_p, train_mask_p, train_y_p)

# sampler for sampling the data during training
train_sampler_p = SequentialSampler(train_data_p)

# dataLoader for train set
train_dataloader_p = DataLoader(train_data_p, sampler=train_sampler_p, batch_size=batch_size)

-----------------------------------------------------------------------------------------------------------------------------

### BERT Embedding

In [88]:
#importing bert and freezing and the layers initial
bert = AutoModel.from_pretrained('bert-base-uncased')
# for param in bert.parameters():
#     param.requires_grad = False

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [89]:
#Defining the model architecture

class BERT_Arch(nn.Module):

    def __init__(self, bert):
        super(BERT_Arch, self).__init__()
        
        self.bert = bert 
        
#         # dropout layer
        
#         self.dropout = nn.Dropout(0.1)
        
#         # relu activation function
        
#         self.relu =  nn.ReLU()
        
#         # dense layer 1
        
#         self.fc1 = nn.Linear(768,512)
        
#         # dense layer 2 (Output layer)
        
#         self.fc2 = nn.Linear(512,2)
        
#         #softmax activation function
        
#         self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, sent_id, mask):
        
        #pass the inputs to the model  
        
        _, cls_hs = self.bert(sent_id, attention_mask=mask,return_dict=False)
        
        x = cls_hs
        
#         x = self.fc1(cls_hs)
        
#         x = self.relu(x)
        
#         x = self.dropout(x)
        
#         # output layer
#         x = self.fc2(x)
        
#         # apply softmax activation
        
#         x = self.softmax(x)
        
        return x

In [90]:
# pass the pre-trained BERT to our define architecture
model = BERT_Arch(bert)

device = torch.device("cuda")

# push the model to GPU
model = model.to(device)

In [91]:
from transformers import AdamW

# define the optimizer
optimizer = AdamW(model.parameters(), lr = 1e-5)          # learning rate



In [92]:
#Define loss function

cross_entropy  = nn.CrossEntropyLoss()

In [106]:
predictions = []
actual = []

In [108]:
# Similarity Based Zero Shot Learning

model.train()
    
for ori_itr, pert_itr in zip(train_dataloader, train_dataloader_p):
    
    sample_ori = [r.to(device) for r in ori_itr]
    sample_pert = [r.to(device) for r in pert_itr]
    
    sent_id, mask, labels = sample_ori
    sent_id_p, mask_p, labels_p = sample_pert
    
    with torch.no_grad():
        preds_ori = model(sent_id, mask)
        preds_pert = model(sent_id_p, mask_p)
    
#     print(preds_pert.size())
    cos = torch.nn.CosineSimilarity(dim=1)
    output = cos(preds_ori, preds_pert)
    
    
    
    actual.append(labels.item())
    predictions.append(output)
    
    preds_ori.detach()
    preds_pert.detach()
    
#     print(labels.item())

    
    # display the output tensor
#     print("Cosine Similarity:",output)


In [116]:
predictions2 = []
for i in predictions:
    predictions2.append(i.detach().cpu())

In [117]:
from sklearn.metrics import roc_curve, precision_recall_curve, auc
actual = np.array(actual)
# predictions = np.array(predictions)
fpr, tpr, _ = roc_curve(actual, predictions2)
roc_auc = auc(fpr, tpr)
print("--------------------------------- Results--------------------------")
print("AUROC Score:", roc_auc)

--------------------------------- Results--------------------------
AUROC Score: 0.6083526570048309


  y = np.asarray(y)
  y = np.asarray(y)


--------------------------------------------------------------------------------------------------------------------------

#### GPT Embedding


In [118]:
predictions_gpt = []
actual_gpt = []

In [135]:
from transformers import AutoTokenizer, GPT2Model
import torch

tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = GPT2Model.from_pretrained("gpt2")

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
outputs = model(**inputs)

last_hidden_states = outputs.last_hidden_state

In [143]:
inputs['input_ids'].size()[1]

6

In [141]:
inputs

{'input_ids': tensor([[15496,    11,   616,  3290,   318, 13779]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]])}

In [127]:
last_hidden_states.size()

torch.Size([1, 6, 768])

In [128]:
print(model)

GPT2Model(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0): GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D()
        (c_proj): Conv1D()
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (1): GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwis

In [147]:
outputs[0][0][5].size()

torch.Size([768])

-------------------- Experiment -------------------------------

In [170]:
actual_gpt = []
predictions_gpt = []

In [171]:
# Similarity Based Zero Shot Learning

model.train()

for ori_itr, pert_itr, label in zip(train_text, train_text_p, train_labels):
    
    inputs_ori = tokenizer(ori_itr, return_tensors="pt")
    outputs_ori = model(**inputs_ori)
    
    inputs_pert = tokenizer(pert_itr, return_tensors="pt")
    outputs_pert = model(**inputs_pert)
    
    n1 = inputs_ori['input_ids'].size()[1]
    n2 = inputs_pert['input_ids'].size()[1]
    
    vec_ori = outputs_ori[0][0][n1-1]
    vec_pert = outputs_pert[0][0][n2-1]
    
    cos = torch.nn.CosineSimilarity(dim=1)
    output = cos(preds_ori, preds_pert)
    
    actual_gpt.append(label)
    predictions_gpt.append(output.item())
    
    

In [172]:
predictions2 = []
for i in predictions_gpt:
    predictions2.append(i.detach().cpu())

AttributeError: 'float' object has no attribute 'detach'

In [174]:
from sklearn.metrics import roc_curve, precision_recall_curve, auc
actual2 = np.array(actual_gpt)
predictions2 = np.array(predictions_gpt)
fpr, tpr, _ = roc_curve(actual2, predictions2)
roc_auc = auc(fpr, tpr)
print("--------------------------------- Results--------------------------")
print("AUROC Score:", roc_auc)

--------------------------------- Results--------------------------
AUROC Score: 0.5
