In [1]:
from datasets import load_dataset
import random

# Load the full test set
dataset = load_dataset("xsum", trust_remote_code=True)
test_docs = dataset["test"]["document"]
test_summaries = dataset["test"]["summary"]

# Filter based on length ratio
filtered_pairs = [
    (doc, summ)
    for doc, summ in zip(test_docs, test_summaries)
    if len(doc.split()) > 0 and (len(summ.split()) / len(doc.split())) < 1.0
]

# Sample 5 documents randomly
sampled_pairs = random.sample(filtered_pairs, k=5)
sampled_docs, sampled_refs = zip(*sampled_pairs)

In [4]:
from ollama import Client
import re

client = Client(host="http://localhost:11434")

def generate_summary_ollama(doc, model="qwen3:8b"):
    prompt = f"Summarize the following news article in one sentence:\n\n{doc}"
    response = client.chat(model=model, messages=[{"role": "user", "content": prompt}])
    output = response['message']['content'].strip()
    
    # Remove all <think>...</think> sections
    output = re.sub(r"<think>.*?</think>", "", output, flags=re.DOTALL).strip()
    
    return output

In [5]:
predictions_vanilla = []

for i, doc in enumerate(sampled_docs):
    print(f"⏳ Prompting {i+1}/5 (vanilla)...")
    try:
        summary = generate_summary_ollama(doc, model="qwen3:8b")
    except Exception as e:
        print(f"⚠️ Error: {e}")
        summary = ""
    predictions_vanilla.append(summary)

⏳ Prompting 1/5 (vanilla)...
⏳ Prompting 2/5 (vanilla)...
⏳ Prompting 3/5 (vanilla)...
⏳ Prompting 4/5 (vanilla)...
⏳ Prompting 5/5 (vanilla)...


In [6]:
import evaluate
rouge = evaluate.load("rouge")

print("📊 ROUGE Scores (Standard Prompt):")
scores_vanilla = rouge.compute(predictions=predictions_vanilla, references=sampled_refs)
for k, v in scores_vanilla.items():
    print(f"{k}: {v:.4f}")

📊 ROUGE Scores (Standard Prompt):
rouge1: 0.2131
rouge2: 0.0358
rougeL: 0.1334
rougeLsum: 0.1334


In [10]:
def generate_summary_with_length_hint(doc, model="qwen3:8b"):
    prompt = (
        "Summarize the following news article in one sentence. "
        "The summary should be around 20 to 25 words long (but don't explicit it in the response). Just return the summary:\n\n"
        f"{doc}"
    )
    response = client.chat(model=model, messages=[{"role": "user", "content": prompt}])
    output = response['message']['content'].strip()
    output = re.sub(r"<think>.*?</think>", "", output, flags=re.DOTALL).strip()
    return output

predictions_length_hint = []
for i, doc in enumerate(sampled_docs):
    print(f"⏳ Prompting {i+1}/5 (length hint)...")
    try:
        summary = generate_summary_with_length_hint(doc)
    except Exception as e:
        print(f"⚠️ Error: {e}")
        summary = ""
    predictions_length_hint.append(summary)

⏳ Prompting 1/5 (length hint)...
⏳ Prompting 2/5 (length hint)...
⏳ Prompting 3/5 (length hint)...
⏳ Prompting 4/5 (length hint)...
⏳ Prompting 5/5 (length hint)...


In [11]:
print("📊 ROUGE Scores (Length Hint Prompt):")
scores_length_hint = rouge.compute(predictions=predictions_length_hint, references=sampled_refs)
for k, v in scores_length_hint.items():
    print(f"{k}: {v:.4f}")

📊 ROUGE Scores (Length Hint Prompt):
rouge1: 0.1449
rouge2: 0.0286
rougeL: 0.1228
rougeLsum: 0.1228


In [12]:
def print_summary_comparison(sampled_docs, sampled_refs, preds_vanilla, preds_len_hint):
    print("🔍 Comparing Model Outputs (Vanilla vs Length-Controlled Prompt)\n")

    for i, (doc, ref, vpred, lpred) in enumerate(zip(sampled_docs, sampled_refs, preds_vanilla, preds_len_hint)):
        print(f"\n==================== EXAMPLE {i+1} ====================")
        print(f"📰 Document:\n{doc}\n")
        
        print(f"✅ Reference Summary ({len(ref.split())} words):\n{ref}\n")

        print(f"🤖 Vanilla Prediction ({len(vpred.split())} words):\n{vpred}\n")
        print(f"🧠 Length-Hint Prediction ({len(lpred.split())} words):\n{lpred}")
        print("=======================================================")

# Call the comparison
print_summary_comparison(sampled_docs, sampled_refs, predictions_vanilla, predictions_length_hint)

🔍 Comparing Model Outputs (Vanilla vs Length-Controlled Prompt)


📰 Document:
When the nozzle releasing a stream of molten glass is raised above a certain level, that thread begins to wobble.
It traces out waves or loops - which the team found could be controlled by adjusting the speed of the nozzle.
Those shapes then become the building blocks for intricate, 3D objects.
Engineers at the Massachusetts Institute of Technology pioneered a method for glass 3D printing in 2015. They then started to work with mathematician Pierre-Thomas Brun, who has studied the fluid dynamics of liquid "ropes".
The results are documented in a video, which the team presented in the American Physical Society's "Gallery of Fluid Motion" in November.
It all started with a collaboration between two labs at MIT, Dr Brun told the BBC - one specialising in glass and the other in "mediated matter".
Runny honey
"Normally when you 3D print, the nozzle is very close to the piece that you're printing - that's the conve