In [None]:
!pip install --quiet transformers datasets evaluate rouge-score
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, BartTokenizer, BartForConditionalGeneration
from datasets import load_dataset
import evaluate
!pip install py7zr
rouge = evaluate.load('rouge')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


**T5 and BART for cnn daily mailset & BBC news summary**

In [7]:
!pip install --quiet transformers datasets evaluate rouge-score py7zr

import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, BartTokenizer, BartForConditionalGeneration
from datasets import load_dataset
import evaluate

# Load the ROUGE metric
rouge = evaluate.load('rouge')

# Use GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Function to calculate ROUGE score
def compute_rouge(predictions, references):
    return rouge.compute(predictions=predictions, references=references, use_stemmer=True)

# Summarization with T5
def summarize_t5(texts, max_input_length=512, max_target_length=150):
    t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
    t5_model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)

    inputs = t5_tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length).to(device)
    summary_ids = t5_model.generate(inputs.input_ids, max_length=max_target_length, num_beams=4, early_stopping=True)

    return [t5_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]

# Summarization with BART
def summarize_bart(texts, max_input_length=512, max_target_length=150):
    bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
    bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-base').to(device)

    inputs = bart_tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=max_input_length).to(device)
    summary_ids = bart_model.generate(inputs.input_ids, max_length=max_target_length, num_beams=4, early_stopping=True)

    return [bart_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]

# Load datasets
cnn_dataset = load_dataset('cnn_dailymail', '3.0.0', split='test[:20%]')
bbc_dataset = load_dataset('gopalkalpande/bbc-news-summary')

# Function to process a dataset
def process_dataset(name, articles, highlights):
    source_texts = articles[:5]
    reference_summaries = highlights[:5]

    # Generate summaries
    print(f"Summarizing {name} with T5...")
    t5_summaries = summarize_t5(source_texts)

    print(f"Summarizing {name} with BART...")
    bart_summaries = summarize_bart(source_texts)

    # Evaluate ROUGE scores
    print(f"Evaluating T5 {name} Summaries...")
    t5_rouge = compute_rouge(t5_summaries, reference_summaries)
    print(f"T5 ROUGE Scores: {t5_rouge}")

    print(f"Evaluating BART {name} Summaries...")
    bart_rouge = compute_rouge(bart_summaries, reference_summaries)
    print(f"BART ROUGE Scores: {bart_rouge}")

    for i in range(5):
        print(f"Original Text {i+1}:\n{source_texts[i]}\n")
        print(f"T5 Summary {i+1}:\n{t5_summaries[i]}\n")
        print(f"BART Summary {i+1}:\n{bart_summaries[i]}\n")
        print("="*50)

# Process CNN/Daily Mail dataset
process_dataset("CNN/Daily Mail", cnn_dataset['article'], cnn_dataset['highlights'])

# Process BBC News dataset
process_dataset("BBC News", bbc_dataset['train']['Articles'], bbc_dataset['train']['Summaries'])


Summarizing CNN/Daily Mail with T5...
Summarizing CNN/Daily Mail with BART...
Evaluating T5 CNN/Daily Mail Summaries...
T5 ROUGE Scores: {'rouge1': 0.2749709019335912, 'rouge2': 0.06543293793266147, 'rougeL': 0.18361330625134817, 'rougeLsum': 0.22930863663885379}
Evaluating BART CNN/Daily Mail Summaries...
BART ROUGE Scores: {'rouge1': 0.3332791867006761, 'rouge2': 0.15068675440947588, 'rougeL': 0.23491516423004102, 'rougeLsum': 0.26868205510052123}
Original Text 1:
(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 