#Pegasus model running for both pegasus(pre-trained) and pegasus-cnn(pre-tuned) and evaluating on CNN/DailyMail test dataset.

In [None]:
!pip install -U transformers
!pip install -U accelerate
!pip install -U datasets
!pip install -U bertviz
!pip install -U umap-learn
!pip install -U sentencepiece
!pip install -U urllib3
!pip install -U py7zr

Collecting transformers
  Downloading transformers-4.44.0-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.44.0-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.42.4
    Uninstalling transformers-4.42.4:
      Successfully uninstalled transformers-4.42.4
Successfully installed transformers-4.44.0
Collecting accelerate
  Downloading accelerate-0.33.0-py3-none-any.whl.metadata (18 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#!pip install --upgrade datasets
from datasets import load_dataset

In [None]:
dataset = load_dataset("cnn_dailymail" , version = '3.0.0', split='test')

In [None]:
dataset

Dataset({
    features: ['article', 'highlights', 'id'],
    num_rows: 11490
})

In [None]:
from transformers import pipeline
import torch
from transformers import BartTokenizer, BartForConditionalGeneration

In [None]:
!pip install rouge_score
!pip install bert_score
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from nltk.translate.meteor_score import meteor_score
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from bert_score import score as bert_score
from nltk.translate.meteor_score import meteor_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=1d5dc2d587190278e3917db41f27ccf24e145805369f34147ff4663ea2bf2a11
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [None]:
#dataset = load_dataset('cnn_dailymail', '3.0.0', split='test')
# Initializing the models and tokenizers for the PEGASUS andPEGASUS-CNN
models = {
    "PEGASUS": PegasusForConditionalGeneration.from_pretrained("google/pegasus-large"),
    "PEGASUS-CNN": PegasusForConditionalGeneration.from_pretrained("google/pegasus-cnn_dailymail")
}
tokenizers = {
    "PEGASUS": PegasusTokenizer.from_pretrained("google/pegasus-large"),
    "PEGASUS-CNN": PegasusTokenizer.from_pretrained("google/pegasus-cnn_dailymail")
}
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for model in models.values():
    model.to(device)
rouge = load_metric('rouge', trust_remote_code=True)
meteor = load_metric('meteor', trust_remote_code=True)
#parameters
batch_size = 16
results = []

#processing in batches for each model
for model_name in models.keys():
    reference_summaries = []
    generated_summaries = []

    model = models[model_name]
    tokenizer = tokenizers[model_name]

    print(f"Processing {model_name}")
    for i in tqdm(range(0, len(dataset), batch_size), desc=f"Processing Test Set with {model_name}"):
        batch = dataset.select(range(i, min(i + batch_size, len(dataset))))
        articles = batch['article']
        references = batch['highlights']

#Tokenizing and encoding the articles
        inputs = tokenizer(articles, max_length=512, return_tensors='pt', truncation=True, padding=True)
        inputs = inputs.to(device)

        #summaries using peg
        summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=142, early_stopping=True)
        generated_batch = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
        #storing references and generated the summaries
        reference_summaries.extend([nltk.word_tokenize(ref.lower()) for ref in references])
        generated_summaries.extend([nltk.word_tokenize(gen.lower()) for gen in generated_batch])
        # ROUGE and METEOR metricsuodate
        rouge.add_batch(predictions=generated_batch, references=references)
        meteor.add_batch(predictions=generated_batch, references=references)

    #BLEU score
    bleu_score = corpus_bleu([[ref] for ref in reference_summaries], generated_summaries, smoothing_function=SmoothingFunction().method7)
    #ROUGE Score
    rouge_result = rouge.compute()
    #METEOR Score
    meteor_result = meteor.compute()
    #cosinesimilarity
    vectorizer = TfidfVectorizer().fit(references + generated_batch)
    reference_vectors = vectorizer.transform(references)
    generated_vectors = vectorizer.transform(generated_batch)
    cosine_scores = cosine_similarity(reference_vectors, generated_vectors).diagonal().mean()
    #BERTScore
    P, R, F1 =bert_score(generated_batch, references, lang='en', verbose=True)
    bert_score_f1= F1.mean().item()
    #results
    results.append({
        "Model": model_name,
        "BLEU": bleu_score,
        "ROUGE-1": rouge_result['rouge1'].mid.fmeasure,
        "ROUGE-2": rouge_result['rouge2'].mid.fmeasure,
        "ROUGE-L": rouge_result['rougeL'].mid.fmeasure,
        "METEOR": meteor_result['meteor'],
        "Cosine Similarity": cosine_scores,
        "BERTScore": bert_score_f1
    })

# Converting the results to a DataFrame
df = pd.DataFrame(results)
print(df)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-large and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordne

Processing PEGASUS


Processing Test Set with PEGASUS: 100%|██████████| 719/719 [54:52<00:00,  4.58s/it]


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.27 seconds, 7.33 sentences/sec
Processing PEGASUS-CNN


Processing Test Set with PEGASUS-CNN: 100%|██████████| 719/719 [43:38<00:00,  3.64s/it]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.07 seconds, 29.65 sentences/sec
         Model      BLEU   ROUGE-1   ROUGE-2   ROUGE-L    METEOR  \
0      PEGASUS  0.211166  0.337864  0.135326  0.221852  0.311271   
1  PEGASUS-CNN  0.286276  0.419191  0.203662  0.301706  0.362825   

   Cosine Similarity  BERTScore  
0           0.363833   0.869930  
1           0.427343   0.888158  
