In [None]:
!ollama run llama3.1:8b

In [13]:
from datasets import load_dataset
import evaluate
import requests
import json
from tqdm.notebook import tqdm  # Use tqdm.notebook for Jupyter Notebook
import time

# Load test data
dataset = load_dataset("gigaword", split="test[:100]")  # Limit to 100 for fast eval

# Initialize ROUGE metric
rouge = evaluate.load('rouge')

In [14]:
def summarize_with_ollama(document, max_tokens=50):
    prompt_template = (
        "You are an AI assistant specialized in summarizing news articles. "
        "Summarize the following news sentence into a concise headline.\n\n"

        "Here is an example:\n"
        "News: Japan 's nec corp. and UNK computer corp. of the united states said wednesday they had agreed to join forces in supercomputer sales.\n"
        "Headline: Nec UNK in computer sales tie-up\n\n"

        "Now summarize the following news:\n\n"

        "News: {document}\n\n"
        "Headline:"
    )
    
    # Format the prompt with the actual news content
    prompt = prompt_template.format(document=document)

    # Ollama API endpoint (default)
    OLLAMA_URL = "http://localhost:11434/api/generate"

    # Define the payload for Ollama
    payload = {
        "model": "llama3.1:8b",     # Change this to your Ollama model name if different
        "prompt": prompt,
        "stream": False,       # You can enable streaming if you want!
        "options": {
            "num_predict": max_tokens,
            "temperature": 0.3
        }
    }

    try:
        # Send the request to Ollama
        response = requests.post(OLLAMA_URL, json=payload)

        if response.status_code == 200:
            result = response.json()
            summary = result.get('response', '').strip()  # Ollama returns "response" key
            return summary
        else:
            print(f"Error {response.status_code}: {response.text}")
            return None

    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None

In [None]:
# Generate summaries and evaluate
references = []
predictions = []

# tqdm around dataset loop with a description and progress bar

start = time.time()

for item in tqdm(dataset, desc="Summarizing", unit="example"):
    doc = item['document']
    ref_summary = item['summary']

    pred_summary = summarize_with_ollama(doc)

    if pred_summary:
        references.append(ref_summary)
        predictions.append(pred_summary)

end = time.time()

# Evaluate with ROUGE
results = rouge.compute(predictions=predictions, references=references)

print("\nOllama (Llama3.1:8b) Summarization Results:")

print(f"Number of examples: {len(references)}")
print(f"\nElapsed time: {end - start:.2f} s")

print("\nROUGE Results:")
for key, value in results.items():
    print(f"{key}: {value:.4f}")

Summarizing:   0%|          | 0/100 [00:00<?, ?example/s]


Ollama (Llama3.1:8b) Summarization Results:
Number of examples: 100

Elapsed time: 49.06 s

ROUGE Results:
rouge1: 0.2886
rouge2: 0.1040
rougeL: 0.2632
rougeLsum: 0.2658
