In [1]:
%%capture
!pip install torch transformers datasets nltk rouge_score

In [2]:
from transformers import (
    ProphetNetForConditionalGeneration,
    ProphetNetTokenizer,
    PegasusForConditionalGeneration,
    PegasusTokenizer,
    BartForConditionalGeneration,
    BartTokenizer,
)

import torch
from torch.utils.data import DataLoader

from datasets import load_dataset
from datasets import load_metric
from nltk.translate.bleu_score import corpus_bleu

import time
import pandas as pd

import nltk
nltk.download('wordnet')
nltk.download('punkt')



[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# Load the models and tokenizers
prophetnet_model = ProphetNetForConditionalGeneration.from_pretrained('microsoft/prophetnet-large-uncased-cnndm').to(device)
prophetnet_tokenizer = ProphetNetTokenizer.from_pretrained('microsoft/prophetnet-large-uncased-cnndm')

pegasus_model = PegasusForConditionalGeneration.from_pretrained('google/pegasus-cnn_dailymail').to(device)
pegasus_tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-cnn_dailymail')

bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn').to(device)
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.57G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/174 [00:00<?, ?B/s]

Downloading (…)prophetnet.tokenizer:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/256 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

In [5]:
# Load CNN/Daily Mail dataset
dataset = load_dataset('cnn_dailymail', '3.0.0', split='validation')

Downloading builder script:   0%|          | 0.00/3.51k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.61k [00:00<?, ?B/s]

Downloading and preparing dataset cnn_dailymail/3.0.0 (download: 558.32 MiB, generated: 1.28 GiB, post-processed: Unknown size, total: 1.82 GiB) to /root/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/3cb851bf7cf5826e45d49db2863f627cba583cbc32342df7349dfe6c38060234...


Downloading data files:   0%|          | 0/5 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/159M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/376M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/572k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/12.3M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/661k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/5 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

Dataset cnn_dailymail downloaded and prepared to /root/.cache/huggingface/datasets/cnn_dailymail/3.0.0/3.0.0/3cb851bf7cf5826e45d49db2863f627cba583cbc32342df7349dfe6c38060234. Subsequent calls will reuse this data.


In [6]:
def calculate_rouge_scores(hypotheses, references):
    """calculate rouge"""
    rouge_metric = load_metric('rouge')
    rouge_output = rouge_metric.compute(predictions=hypotheses, references=references)
    rouge_scores = rouge_output['rouge1'].mid.fmeasure, rouge_output['rouge2'].mid.fmeasure, rouge_output['rougeL'].mid.fmeasure
    return rouge_scores


def calculate_bleu_score(hypotheses, references):
    """calculate bleu"""
    bleu = corpus_bleu([[tgt] for tgt in references], hypotheses)
    return bleu


def generate_summaries(model, tokenizer, dataset):
    """Function to generate summaries using a model and tokenizer"""
    summaries = []
    for example in dataset:
        inputs = tokenizer.encode(example['article'], truncation=True, max_length=512, padding='longest', return_tensors='pt')
        input_ids = inputs.to(model.device)
        summary_ids = model.generate(input_ids, num_beams=4, max_length=128, early_stopping=True)
        summary = tokenizer.decode(summary_ids.squeeze(), skip_special_tokens=True)
        summaries.append(summary)
    return summaries

In [7]:
# Define the number of examples to use
num_examples = 200  

# Select a subset of examples from the dataset
subset_dataset = dataset.select(range(num_examples))

# Perform summarization using each model

In [8]:
start_time = time.time()
prophetnet_summaries = generate_summaries(prophetnet_model, prophetnet_tokenizer, subset_dataset)
end_time = time.time()
prophetnet_eval_time = end_time - start_time

In [9]:
start_time = time.time()
pegasus_summaries = generate_summaries(pegasus_model, pegasus_tokenizer, subset_dataset)
end_time = time.time()
pegasus_eval_time = end_time - start_time

In [10]:
start_time = time.time()
bart_summaries = generate_summaries(bart_model, bart_tokenizer, subset_dataset)
end_time = time.time()
bart_eval_time = end_time - start_time

# Calculate ROUGE and BLEU scores

In [11]:
references = subset_dataset['highlights']

In [12]:
rouge_scores_prophetnet = calculate_rouge_scores(prophetnet_summaries, references)
rouge_scores_pegasus = calculate_rouge_scores(pegasus_summaries, references)
rouge_scores_bart = calculate_rouge_scores(bart_summaries, references)

Downloading builder script:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

In [13]:
bleu_score_prophetnet = calculate_bleu_score(prophetnet_summaries, references)
bleu_score_pegasus = calculate_bleu_score(pegasus_summaries, references)
bleu_score_bart = calculate_bleu_score(bart_summaries, references)

In [14]:
prophetnet_model_size = prophetnet_model.num_parameters()
pegasus_model_size = pegasus_model.num_parameters()
bart_model_size = bart_model.num_parameters()

# Results

In [15]:
data = {
    'Model Name': ['ProphetNet', 'Pegasus', 'BART'],
    'Number of Parameters': [prophetnet_model_size, pegasus_model_size, bart_model_size],
    'Evaluation Time(seconds)': [prophetnet_eval_time, pegasus_eval_time, bart_eval_time],
    'ROUGE_1 Score': [rouge_scores_prophetnet[0], rouge_scores_pegasus[0], rouge_scores_bart[0]],
    'ROUGE_2 Score': [rouge_scores_prophetnet[1], rouge_scores_pegasus[1], rouge_scores_bart[1]],
    'ROUGE_L Score': [rouge_scores_prophetnet[2], rouge_scores_pegasus[2], rouge_scores_bart[2]],
    'BLEU Score': [bleu_score_prophetnet, bleu_score_pegasus, bleu_score_bart]
}

df = pd.DataFrame(data)
df['Number of Parameters'] = df['Number of Parameters'].astype(str).str[:3] + '_' + df['Number of Parameters'].astype(str).str[3:6] + '_' + df['Number of Parameters'].astype(str).str[6:]

In [16]:
df

Unnamed: 0,Model Name,Number of Parameters,Evaluation Time(seconds),ROUGE_1 Score,ROUGE_2 Score,ROUGE_L Score,BLEU Score
0,ProphetNet,391_321_600,379.865871,0.319703,0.138156,0.245167,0.381612
1,Pegasus,570_797_056,183.277522,0.337056,0.142803,0.25009,0.452055
2,BART,406_290_432,189.75744,0.333951,0.143113,0.249003,0.365206
