# Prompting flan-T5-base

In [1]:
!pip install transformers datasets evaluate rouge_score --quiet

In [1]:
from datasets import load_dataset

# Full dataset (split included)
dataset = load_dataset("xsum", trust_remote_code=True)
test_sample = dataset["test"][0]

print("Document:", test_sample["document"])
print("\nReference Summary:", test_sample["summary"])

Document: Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.
Workers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.
The Welsh Government said more people than ever were getting help to address housing problems.
Changes to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.
Prison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.
However, the same could not be said for men, the charity said, because issues which often affect them, such as post traumatic stress disorder or drug dependency, were often viewed as less of a priority.
Andrew Stevens, who works in Welsh prisons trying to secure housing for prison leavers, said the need for 

In [3]:
!pip uninstall keras -y
!pip install keras==2.11

Found existing installation: keras 2.11.0
Uninstalling keras-2.11.0:
  Successfully uninstalled keras-2.11.0
Collecting keras==2.11
  Using cached keras-2.11.0-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
Installing collected packages: keras
Successfully installed keras-2.11.0


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.19.0 requires keras>=3.5.0, but you have keras 2.11.0 which is incompatible.


In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [3]:
def generate_with_strategy(doc, strategy="greedy"):
    input_ids = tokenizer("summarize: " + doc, return_tensors="pt", truncation=True).input_ids.to(model.device)

    if strategy == "greedy":
        output = model.generate(input_ids, max_length=64)
    elif strategy == "beam":
        output = model.generate(input_ids, max_length=64, num_beams=5, early_stopping=True)
    elif strategy == "topk":
        output = model.generate(input_ids, max_length=64, do_sample=True, top_k=50)
    elif strategy == "topp":
        output = model.generate(input_ids, max_length=64, do_sample=True, top_p=0.9)
    else:
        raise ValueError("Unknown strategy")

    return tokenizer.decode(output[0], skip_special_tokens=True)

In [6]:
strategies = ["greedy", "beam", "topk", "topp"]
subset = dataset["test"].select(range(100))
references = [example["summary"] for example in subset]


for i in range(3):  # Test on 3 samples
    doc = subset[i]["document"]
    ref = subset[i]["summary"]
    
    print(f"\n🔹 Example {i+1}")
    print("="*80)
    print("📄 Document:\n", doc[:800], "..." if len(doc) > 800 else "")
    print("\n✅ Reference Summary:\n", ref)

    for strat in strategies:
        pred = generate_with_strategy(doc, strategy=strat)
        print(f"\n🧠 {strat.upper()} Output:\n{pred}")
    print("="*80)


🔹 Example 1
📄 Document:
 Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.
Workers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.
The Welsh Government said more people than ever were getting help to address housing problems.
Changes to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.
Prison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.
However, the same could not be said for men, the charity said, because issues which often affect them, such as post tra ...

✅ Reference Summary:
 There is a "chronic" need for more housing for prison leavers in Wales, according to a charity.

🧠 GREEDY Output:
Prison is a "dangerous" place fo

In [6]:
def generate_summary(doc, prompt_template="Please write a short summary of the following article:\n\n{}",
                     max_input=512, max_output=64):
    prompt = prompt_template.format(doc)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_input, padding=True).to(model.device)
    outputs = model.generate(**inputs, max_length=max_output)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [5]:
import evaluate

rouge = evaluate.load("rouge")

# Evaluate on a 100-sample test slice
subset = dataset["test"].select(range(100))

predictions = [generate_summary(example["document"]) for example in subset]
references = [example["summary"] for example in subset]

rouge_results = rouge.compute(predictions=predictions, references=references)
print("ROUGE Results:", rouge_results)

KeyboardInterrupt: 

In [8]:
# Show a few test examples with model predictions
num_examples = 3
for i in range(num_examples):
    doc = subset[i]["document"]
    ref = subset[i]["summary"]
    pred = generate_summary(doc)

    print(f"\n🔹 Example {i+1}")
    print("=" * 80)
    print("📄 Document:\n", doc[:800], "..." if len(doc) > 800 else "")  # truncate long doc
    print("\n✅ Reference Summary:\n", ref)
    print("🤖 Model Summary:\n", pred)
    print("=" * 80)


🔹 Example 1
📄 Document:
 Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.
Workers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.
The Welsh Government said more people than ever were getting help to address housing problems.
Changes to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.
Prison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.
However, the same could not be said for men, the charity said, because issues which often affect them, such as post tra ...

✅ Reference Summary:
 There is a "chronic" need for more housing for prison leavers in Wales, according to a charity.
🤖 Model Summary:
 Prisoners are being put out of w

### Few shot

In [15]:
# Few-shot examples from training set
train_examples = dataset["train"].select(range(2))

def build_few_shot_prompt(target_doc, few_shots):
    prompt = ""
    for ex in few_shots:
        prompt += f"Document: {ex['document'].strip()}\nSummary: {ex['summary'].strip()}\n\n"
    prompt += f"Document: {target_doc.strip()}\nSummary:"
    return prompt

In [16]:
def generate_few_shot(doc, few_shots):
    prompt = build_few_shot_prompt(doc, few_shots)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512, padding=True).to(model.device)
    outputs = model.generate(**inputs, max_length=64)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [17]:
# Generate few-shot predictions (dynamic prompt per example)
few_shot_preds = []
for example in subset:
    summary = generate_few_shot(example["document"], train_examples)
    few_shot_preds.append(summary)

few_shot_rouge = rouge.compute(predictions=few_shot_preds, references=references)
print("Few-shot ROUGE:", few_shot_rouge)

Few-shot ROUGE: {'rouge1': 0.0858592486704474, 'rouge2': 0.005658787166427234, 'rougeL': 0.07332448990688756, 'rougeLsum': 0.07345624283103427}


In [18]:
for i in range(3):
    print(f"\n🔹 Example {i+1}")
    print("="*80)
    print("📄 Document:\n", subset[i]["document"][:800], "..." if len(subset[i]["document"]) > 800 else "")
    print("\n✅ Reference Summary:\n", subset[i]["summary"])
    print("🤖 Few-shot Summary:\n", few_shot_preds[i])
    print("="*80)


🔹 Example 1
📄 Document:
 Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.
Workers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.
The Welsh Government said more people than ever were getting help to address housing problems.
Changes to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.
Prison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.
However, the same could not be said for men, the charity said, because issues which often affect them, such as post tra ...

✅ Reference Summary:
 There is a "chronic" need for more housing for prison leavers in Wales, according to a charity.
🤖 Few-shot Summary:
 The Dumfries and Galloway are