In [1]:
import pandas as pd

from transformers import pipeline
from datasets import load_dataset
from evaluate import load

In [2]:
dataset = load_dataset(
    "csv",
    data_files={
        "train": "Datasets/TrainSummary.csv",
        "validation": "Datasets/ValidationSummary.csv",
        "test": "Datasets/TestSummary.csv"
    },
)
dataset

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'text', 'summary'],
        num_rows: 10000
    })
    validation: Dataset({
        features: ['Unnamed: 0', 'text', 'summary'],
        num_rows: 3000
    })
    test: Dataset({
        features: ['Unnamed: 0', 'text', 'summary'],
        num_rows: 200
    })
})

In [3]:
rouge = load("rouge")

In [4]:
def calculate_scores(dataset, model_path):
    summarizer = pipeline("summarization", model=model_path, device=0)

    references = dataset["summary"]
    predictions = []

    summary = summarizer(
        dataset["text"], max_length=100, truncation=True
    )
    for dict in summary:
        predictions.append(dict["summary_text"])

    rouge_scores = rouge.compute(references=references, predictions=predictions)

    return rouge_scores

In [6]:
rouge_scores1 = calculate_scores(dataset['test'], "SummarizerModels/BART")

Your max_length is set to 100, but your input_length is only 33. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=16)
  attn_output = torch.nn.functional.scaled_dot_product_attention(
Your max_length is set to 100, but your input_length is only 37. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)
Your max_length is set to 100, but your input_length is only 30. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=15)
Your max_length is set to 100, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasi

In [11]:
rouge_scores2 = calculate_scores(dataset['test'], "SummarizerModels/PEGASUS")

Your max_length is set to 100, but your input_length is only 29. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Your max_length is set to 100, but your input_length is only 36. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)
Your max_length is set to 100, but your input_length is only 29. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=14)
Your max_length is set to 100, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)
Your

In [12]:
rouge_scores3 = calculate_scores(dataset['test'], "SummarizerModels/T5")

Your max_length is set to 100, but your input_length is only 37. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)
Your max_length is set to 100, but your input_length is only 43. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=21)
Your max_length is set to 100, but your input_length is only 34. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=17)
Your max_length is set to 100, but your input_length is only 25. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=12)
Your

In [13]:
rs = [rouge_scores1, rouge_scores2, rouge_scores3]
rscolumn_names = list(rouge_scores1.keys())
rouge_scores = pd.DataFrame(rs, columns=rscolumn_names, index=["BART", "PEGASUS", "T5"])

print("ROUGE scores for the finetuned models: ")
rouge_scores

ROUGE scores for the finetuned models: 


Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum
BART,0.425118,0.204302,0.360182,0.407906
PEGASUS,0.406019,0.182253,0.341768,0.372712
T5,0.291208,0.118444,0.23424,0.259923
