In [1]:
import pandas as pd
import numpy as np
import json
from transformers import pipeline
from rouge_score import rouge_scorer

In [None]:
def compute_rouge(reference, hypothesis):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    return scorer.score(reference, hypothesis)

def load_examples(file_path, num_examples=5):
    examples = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for _ in range(num_examples):
            line = f.readline()
            if not line:
                break
            data = json.loads(line)
            examples.append({
                "headline": data.get("headline", "N/A"),
                "human_summary": data.get("summary", "N/A"),
                "article": data.get("text", "")
            })
    return examples

candidate_models = [
    "facebook/bart-large-cnn",
    "t5-small",
    "google/pegasus-xsum"
]
param_grid = [
    {"max_length": 150, "min_length": 40},
    {"max_length": 200, "min_length": 50},
]

examples = load_examples("../data/newsroom/train.jsonl", num_examples=5)

results = []
for model_name in candidate_models:
    for params in param_grid:
        summarizer = pipeline("summarization", model=model_name)
        scores_list = []
        for ex in examples:
            article = ex["article"]
            if "t5" in model_name:
                article = "summarize: " + article
            try:
                generated = summarizer(article, max_length=params["max_length"],
                                       min_length=params["min_length"], do_sample=False)
                generated_summary = generated[0]['summary_text']
            except Exception:
                generated_summary = ""
            scores_list.append(compute_rouge(ex["human_summary"], generated_summary))
        
        avg_scores = {
            "model": model_name,
            "params": params,
            "rouge1_f": np.mean([s['rouge1'].fmeasure for s in scores_list]),
            "rouge2_f": np.mean([s['rouge2'].fmeasure for s in scores_list]),
            "rougeL_f": np.mean([s['rougeL'].fmeasure for s in scores_list])
        }
        results.append(avg_scores)

best = max(results, key=lambda x: x["rouge1_f"])
print("\nBest HPO setting:")
print(best)


Device set to use mps:0
Device set to use mps:0
Device set to use mps:0
Token indices sequence length is longer than the specified maximum sequence length for this model (2813 > 512). Running this sequence through the model will result in indexing errors
Device set to use mps:0
Token indices sequence length is longer than the specified maximum sequence length for this model (2813 > 512). Running this sequence through the model will result in indexing errors
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use mps:0
Token indices sequence length is longer than the specified maximum sequence length for this model (2409 > 512). Running this sequence through the model will result in indexing e


Best HPO setting:
{'model': 'facebook/bart-large-cnn', 'params': {'max_length': 200, 'min_length': 50}, 'rouge1_f': 0.3019220346049615, 'rouge2_f': 0.16872180451127822, 'rougeL_f': 0.2360322423737058}
