Collab Link : https://colab.research.google.com/drive/1F0xBz3vV6JPEdM4Dl9WcPV7WhP-mWZ60?usp=sharing

# This Notebook contains code to evalutate the paraphrases generated by our fine tuned T5 model by using our fine tuned BERT Classifier

# Imports

In [None]:
!pip install --quiet transformers
!pip install --quiet tokenizers
!pip install --quiet sentencepiece
!pip install --quiet torchtext torch
!pip install --quiet pytorch_lightning

[K     |████████████████████████████████| 3.1 MB 13.5 MB/s 
[K     |████████████████████████████████| 3.3 MB 34.5 MB/s 
[K     |████████████████████████████████| 59 kB 6.7 MB/s 
[K     |████████████████████████████████| 895 kB 43.7 MB/s 
[K     |████████████████████████████████| 596 kB 52.8 MB/s 
[K     |████████████████████████████████| 1.2 MB 12.7 MB/s 
[K     |████████████████████████████████| 1.0 MB 12.7 MB/s 
[K     |████████████████████████████████| 829 kB 31.3 MB/s 
[K     |████████████████████████████████| 132 kB 33.7 MB/s 
[K     |████████████████████████████████| 329 kB 37.3 MB/s 
[K     |████████████████████████████████| 1.1 MB 42.3 MB/s 
[K     |████████████████████████████████| 271 kB 52.6 MB/s 
[K     |████████████████████████████████| 160 kB 50.1 MB/s 
[K     |████████████████████████████████| 192 kB 51.7 MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone


In [None]:
from tabulate import tabulate

In [None]:
import random
import pandas as pd
import numpy as np

from tqdm.auto import tqdm
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from transformers import BertTokenizerFast as BertTokenizer, BertModel
import pytorch_lightning as pl

from pytorch_lightning.plugins import DDPPlugin

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix
from sklearn.metrics import accuracy_score,f1_score,roc_curve
from transformers import T5ForConditionalGeneration,T5Tokenizer
from tqdm import tqdm


In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


# Paraphrase Classifier

## Bert Class

In [None]:
class BertParaphrase(pl.LightningModule):

  def __init__(self):
    super().__init__()
    self.model =  BertModel.from_pretrained( 'bert-base-cased' ,return_dict=True)
    self.classifier = nn.Linear(self.model.config.hidden_size, 1)
    self.criterion = nn.BCELoss()
    self.relu = nn.ReLU()
    self.layer_1 = nn.Linear(self.model.config.hidden_size , self.model.config.hidden_size)

  def forward(self, input_ids, attention_mask, labels=None):
  
    output = self.model(input_ids, attention_mask=attention_mask)
    output = self.layer_1(output.pooler_output)
    output = self.relu(output)
    output = self.classifier(output)
    output = torch.sigmoid(output)
    
    loss = 0

    if labels is not None:
        loss = self.criterion(output, labels)
    return loss, output


## Tokenizer

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

## Load the model form checkpoint

In [None]:
bert_model = BertParaphrase.load_from_checkpoint("drive/MyDrive/IRE/Classifer/classfier.ckpt")
bert_model.to("cuda")
bert_model.eval()
bert_model.freeze()

Downloading:   0%|          | 0.00/416M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


# Generate Predictions

In [None]:
def get_prediction(trained_model , sentence , paraphrase):
  enc = tokenizer.encode_plus(
      sentence,
      paraphrase,
      add_special_tokens=True,
      max_length=120,
      return_token_type_ids=False,
      padding="max_length",
      return_attention_mask=True,
      return_tensors='pt',
    )
  
  _,prediction = trained_model(enc["input_ids"].to("cuda"), enc["attention_mask"].to("cuda"))
  prediction = prediction.flatten().cpu().numpy()
  pred_label = prediction[0] > 0.8
  pred_label = pred_label * 1
  return pred_label

# Paraphrase Generator

In [None]:
t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')
t5_model = T5ForConditionalGeneration.from_pretrained('./drive/MyDrive/IRE/Final_t5_parphrase')
t5_model.to("cuda")

# Generate Paraphrases

In [None]:
def generate(model, tokenizer, sentence):


  text = "paraphrase: " + sentence
  max_len = 256

  encoding = tokenizer.encode_plus(text,padding='max_length', return_tensors="pt")
  input_ids, attention_masks = encoding["input_ids"].to("cuda"), encoding["attention_mask"].to("cuda")
  

  outputs = model.generate(
    input_ids=input_ids, 
    attention_mask=attention_masks,
    do_sample=True,
    max_length=256,
    top_k=220,
    top_p=0.9,
    early_stopping=True,
    num_return_sequences=20
  )



  final_outputs = []

  for output in outputs:
      sent = tokenizer.decode(output, skip_special_tokens=True,clean_up_tokenization_spaces=True)
      if sent.lower() != sentence.lower() and sent not in final_outputs:
          final_outputs.append(sent) 

  
  return final_outputs
   

# Data for Evaluation

In [None]:
df = pd.read_csv("../../PAWS_dataset/paws_test.csv")
total_data = list(df['sentence1'])

In [None]:
data = random.sample(total_data, 1000)

In [None]:
def generate_score_data(data):

    sum_of_average = 0
    all_paraphrases = []
    table_data = []

    for sentence in tqdm(data):
        paraphrases = generate(t5_model ,t5_tokenizer, sentence)
        curr_data = []
        not_paraphrase = 0        
        for output in paraphrases:
            prediction = get_prediction(bert_model, sentence, output)
            if(prediction == 0): not_paraphrase += 1
            curr_data.append([output, prediction])
        sum_of_average +=  1 - (not_paraphrase)/ (len(paraphrases))
        table_data.append(curr_data) 
    
    final_score = sum_of_average / (len(data))

    for i in range(10):
        sentence = data[i]
        print()
        print("INPUT SENTENCE IS : ")
        print(sentence)
        print()
        print("GENERATE PARAPHRASE AND CLASSIFER PREDICTION : ")
        print()
        print(tabulate(table_data[i], headers=['Paraphrase', 'Classifier Prediction'],tablefmt='orgtbl'))
        print()
        print('=====================================================================================================')            

    return final_score

In [None]:
final_score = generate_score_data(data)

100%|██████████| 1000/1000 [48:38<00:00,  2.92s/it]


INPUT SENTENCE IS : 
Both electromagnetic brakes and eddy current brakes use electromagnetic force but electromagnetic brakes ultimately depend on friction and eddy current brakes use magnetic force directly .

GENERATE PARAPHRASE AND CLASSIFER PREDICTION : 

| Paraphrase                                                                                                                                                                                      |   Classifier Prediction |
|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------|
| Both electromagnetic brakes and eddy current brakes use electromagnetic force, but electromagnetic brakes ultimately depend on friction and eddy current brakes use magnetic force directly.    |                       1 |
| Both electromagnetic brakes and eddy current brakes use electromagnetic




In [None]:
print(final_score)

0.9059774993582276
