In [1]:

try:
    import subprocess, sys
    subprocess.run(["ollama", "pull", "llama3.2"], check=False)
except Exception as e:
    print("[INFO] If this fails, pull via terminal: `ollama pull llama3.2`")


In [2]:
import os, json, re, time
import requests
import pandas as pd
from dataclasses import dataclass
from typing import Dict, Any, Optional


OLLAMA_BASE_URL = os.getenv("OLLAMA_URL", "http://localhost:11434")
OLLAMA_CHAT_ENDPOINT = f"{OLLAMA_BASE_URL}/api/chat"
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "llama3.2")

def ping_ollama():
    try:
        r = requests.get(OLLAMA_BASE_URL, timeout=3)
        print("Ollama server reachable:", r.status_code)
    except Exception as e:
        print("[ERROR] Ollama server not reachable at", OLLAMA_BASE_URL)
        print(" - Ensure Ollama is installed and running.")
        print(" - Default URL is http://localhost:11434")
        raise

ping_ollama()
print("Using model:", OLLAMA_MODEL)


Ollama server reachable: 200
Using model: llama3.2


In [3]:

REVIEW_TEXT = """
Review by Nathaniel: After two weeks of hunting, I finally grabbed the Acme Super Blender (SB-200).
I almost clicked purchase on the Acme Mini Mixer, but held off.
Paid £89.99 after a holiday promo—list price was $129.
Order confirmation shows Nov 3rd, 2025; pickup happened 11/02/2025.
"Finally affordable at ninety bucks," I joked.
Store: HomeWorks – Seattle.
""".strip()

GROUND_TRUTH = {
    "name": "Acme Super Blender",
    "price": {"amount": 89.99, "currency": "GBP"},
    "date": "2025-11-03",
}


In [4]:

def ollama_chat(messages, temperature: float = 0.2, num_predict: int = 512) -> str:
    """
    Call Ollama's /api/chat endpoint (non-stream) and return assistant text.
    messages: list of dicts [{"role": "system"|"user"|"assistant", "content": "..."}]
    """
    payload = {
        "model": OLLAMA_MODEL,
        "messages": messages,
        "stream": False,
        "options": {
            "temperature": temperature,
            "num_predict": num_predict,
        }
    }
    r = requests.post(OLLAMA_CHAT_ENDPOINT, json=payload, timeout=60)
    r.raise_for_status()
    data = r.json()
    return data.get("message", {}).get("content", "")


In [5]:

ISO_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")

def try_parse_json(text: str) -> Optional[Dict[str, Any]]:
    text = text.strip()
    brace_start = text.find("{")
    brace_end = text.rfind("}")
    if brace_start != -1 and brace_end != -1 and brace_end > brace_start:
        candidate = text[brace_start:brace_end+1]
        try:
            return json.loads(candidate)
        except Exception:
            pass
    return None

def compute_accuracy(pred: Dict[str, Any], truth: Dict[str, Any]) -> float:
    """
    Score 0-5 (1.25 each for: name, price.amount, price.currency, date).
    """
    score = 0.0
    if pred.get("name") == truth["name"]:
        score += 1.25
    try:
        if float(pred.get("price", {}).get("amount")) == float(truth["price"]["amount"]):
            score += 1.25
    except Exception:
        pass
    if pred.get("price", {}).get("currency") == truth["price"]["currency"]:
        score += 1.25
    if pred.get("date") == truth["date"]:
        score += 1.25
    return score

def compute_format_score(text: str, pred_json: Optional[Dict[str, Any]]) -> float:
    """
    Format adherence: JSON-only, schema, ISO date. Score 0-5:
      - JSON present (2.0)
      - Exact keys & types (2.0)
      - ISO date string (1.0)
    """
    score = 0.0
    if pred_json is not None:
        score += 2.0
        schema_ok = set(pred_json.keys()) == {"name", "price", "date"}
        price_ok = isinstance(pred_json.get("price"), dict) and \
                   set(pred_json["price"].keys()) == {"amount", "currency"} and \
                   isinstance(pred_json["price"].get("amount"), (int, float)) and \
                   isinstance(pred_json["price"].get("currency"), str)
        name_ok = isinstance(pred_json.get("name"), str)
        date_ok = isinstance(pred_json.get("date"), str)
        if schema_ok and price_ok and name_ok and date_ok:
            score += 2.0
        if date_ok and ISO_DATE_RE.match(pred_json["date"]):
            score += 1.0
    return score

def compute_instruction_score(text: str, pred_json: Optional[Dict[str, Any]]) -> float:
    """
    Instruction adherence: JSON-only (no extra prose), ISO currency, disambiguation rules.
    Score 0-5:
      - No extra prose (JSON-only) (1.5)
      - Currency ISO code (GBP/USD/etc.) (1.5)
      - Correct product/date/price selection vs. truth (2.0)
    """
    score = 0.0
    if pred_json is not None:
        clean_json = json.dumps(pred_json, ensure_ascii=False)
        if text.strip().startswith("{") and text.strip().endswith("}") and len(text.strip()) <= len(clean_json) + 10:
            score += 1.5
        curr = pred_json.get("price", {}).get("currency")
        if curr in {"USD", "GBP", "EUR", "JPY", "AUD", "CAD"}:
            score += 1.5
        if pred_json.get("name") == GROUND_TRUTH["name"] and \
           pred_json.get("date") == GROUND_TRUTH["date"] and \
           pred_json.get("price", {}).get("amount") == GROUND_TRUTH["price"]["amount"]:
            score += 2.0
    return score

@dataclass
class RunResult:
    run_id: str
    technique: str
    prompt: str
    raw_output: str
    parsed: Optional[Dict[str, Any]]
    accuracy: float
    format_score: float
    instruction_score: float

def evaluate_run(run_id: str, technique: str, prompt: str, raw_output: str) -> RunResult:
    parsed = try_parse_json(raw_output)
    acc = compute_accuracy(parsed or {}, GROUND_TRUTH) if parsed else 0.0
    fmt = compute_format_score(raw_output, parsed)
    instr = compute_instruction_score(raw_output, parsed)
    return RunResult(
        run_id=run_id,
        technique=technique,
        prompt=prompt,
        raw_output=raw_output,
        parsed=parsed,
        accuracy=acc,
        format_score=fmt,
        instruction_score=instr
    )


In [6]:

BASELINE_PROMPT = "Extract the data."

messages_baseline = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": f"{BASELINE_PROMPT}\n\nText:\n{REVIEW_TEXT}"},
]

print("Running Baseline...")
baseline_output = ollama_chat(messages_baseline, temperature=0.7, num_predict=256)
print("--- Baseline Output ---\n", baseline_output)

baseline_result = evaluate_run(
    run_id="Run-1",
    technique="Baseline",
    prompt=BASELINE_PROMPT,
    raw_output=baseline_output
)


Running Baseline...
--- Baseline Output ---
 Here's the extracted data:

1. Product: Acme Super Blender (SB-200)
2. Price:
   - List price: $129
   - Purchase price: £89.99
3. Date:
   - Order confirmation date: Nov 3rd, 2025
   - Pickup date: 11/02/2025 (2 days after order confirmation)
4. Store: HomeWorks - Seattle


In [7]:

ROLE_PROMPT = (
    "Act as a Senior Data Analyst specializing in information extraction from noisy text.\n"
    "Your job is to extract the product actually purchased, the final amount paid (not list price), "
    "and the order confirmation date (if present). If multiple products are mentioned, pick the one purchased.\n"
    "Return fields Name, Price, and Date in plain text."
)

messages_role = [
    {"role": "system", "content": "You are a Senior Data Analyst specializing in structured extraction."},
    {"role": "user", "content": f"{ROLE_PROMPT}\n\nText:\n{REVIEW_TEXT}"},
]

print("\nRunning Technique 1 (Role Prompting)...")
role_output = ollama_chat(messages_role, temperature=0.3, num_predict=256)
print("--- Technique 1 Output ---\n", role_output)

role_result = evaluate_run(
    run_id="Run-2",
    technique="Role Prompting",
    prompt=ROLE_PROMPT,
    raw_output=role_output
)



Running Technique 1 (Role Prompting)...
--- Technique 1 Output ---
 Here is the extracted information in plain text:

Name: Acme Super Blender (SB-200)
Price: £89.99
Date: Nov 3rd, 2025

Note that I've only included the product "Acme Super Blender (SB-200)" as it was explicitly mentioned as the one purchased, and not the other products like Mini Mixer which were mentioned but not confirmed to be purchased.

If multiple products are mentioned, I would typically try to identify the most relevant or explicit mention of a purchase. In this case, I've chosen the first product mentioned that is also followed by "finally grabbed", indicating it was the one purchased.


In [8]:

FORMAT_PROMPT = """Extract the following fields from the review and provide output as STRICT JSON.

Requirements:
- JSON only; no prose.
- Fields and types:
  {
    "name": string,
    "price": { "amount": number, "currency": "USD|EUR|GBP|..." },
    "date": "YYYY-MM-DD"
  }
- Choose the product clearly purchased.
- Use the final paid price (not list price).
- If multiple dates appear, prefer the order confirmation date; otherwise use the purchase/pickup date.
- Normalize currency symbol to ISO code (e.g., £ → GBP).
- Convert "Nov 3rd, 2025" → "2025-11-03".

Text:
"""

messages_format = [
    {"role": "system", "content": "Return only a single JSON object exactly matching the schema."},
    {"role": "user", "content": f"{FORMAT_PROMPT}{REVIEW_TEXT}"},
]

print("\nRunning Technique 2 (Strict Output Formatting)...")
format_output = ollama_chat(messages_format, temperature=0.2, num_predict=256)
print("--- Technique 2 Output ---\n", format_output)

format_result = evaluate_run(
    run_id="Run-3",
    technique="Strict Output Formatting",
    prompt=FORMAT_PROMPT,
    raw_output=format_output
)



Running Technique 2 (Strict Output Formatting)...
--- Technique 2 Output ---
 ```json
{
  "name": "Acme Super Blender (SB-200)",
  "price": {
    "amount": 89.99,
    "currency": "GBP"
  },
  "date": "2025-11-03"
}
```

Note: The date was converted to the format `YYYY-MM-DD` as per the requirements.


In [9]:

FINAL_PROMPT = """You are a Senior Data Analyst specializing in structured extraction from noisy text.

Task:
Extract the product actually purchased, the final price paid (ignore list price), and the correct date (prefer order confirmation; otherwise purchase/pickup).

Output:
- Return ONLY a single JSON object (no commentary).
- Schema:
  {
    "name": string,
    "price": { "amount": number, "currency": "USD|EUR|GBP|..." },
    "date": "YYYY-MM-DD"
  }
- Normalization:
  - Currency: map symbol to ISO (e.g., £ → GBP, $ → USD).
  - Date: convert to ISO 8601 (YYYY-MM-DD).
  - If multiple products, choose the one explicitly purchased.
  - If multiple dates, prefer order confirmation; else use purchase/pickup.
- If a field is truly missing, set null; do not invent values.

Important:
- Reason internally.
- Output JSON only.

Text:
"""

messages_final = [
    {"role": "system", "content": "Return strictly one JSON object and nothing else."},
    {"role": "user", "content": f"{FINAL_PROMPT}{REVIEW_TEXT}"},
]

print("\nRunning Final Optimized Prompt...")
final_output = ollama_chat(messages_final, temperature=0.2, num_predict=256)
print("--- Final Output ---\n", final_output)

final_result = evaluate_run(
    run_id="Run-5",
    technique="Final Optimized Prompt",
    prompt=FINAL_PROMPT,
    raw_output=final_output
)



Running Final Optimized Prompt...
--- Final Output ---
 {
  "name": "Acme Super Blender (SB-200)",
  "price": {
    "amount": 89.99,
    "currency": "GBP"
  },
  "date": "2025-11-03"
}


In [11]:

all_results = [
    baseline_result,
    role_result,
    format_result,
    final_result
]

df = pd.DataFrame([{
    "run_id": r.run_id,
    "technique": r.technique,
    "accuracy(0-5)": r.accuracy,
    "format(0-5)": r.format_score,
    "instruction(0-5)": r.instruction_score,
    "parsed_json": json.dumps(r.parsed, ensure_ascii=False) if r.parsed else None,
    "raw_output": r.raw_output
} for r in all_results])

print("\n=== Summary Table ===")
display(df)

timestamp = time.strftime("%Y%m%d-%H%M%S")
out_path = f"ollama_structured_extraction_{timestamp}.jsonl"
with open(out_path, "w", encoding="utf-8") as f:
    for r in all_results:
        rec = {
            "run_id": r.run_id,
            "technique": r.technique,
            "prompt": r.prompt,
            "raw_output": r.raw_output,
            "parsed": r.parsed,
            "scores": {
                "accuracy": r.accuracy,
                "format": r.format_score,
                "instruction": r.instruction_score
            }
        }
        f.write(json.dumps(rec, ensure_ascii=False) + "\n")

print(f"\nSaved results to {out_path}")



=== Summary Table ===


Unnamed: 0,run_id,technique,accuracy(0-5),format(0-5),instruction(0-5),parsed_json,raw_output
0,Run-1,Baseline,0.0,0.0,0.0,,Here's the extracted data:\n\n1. Product: Acme...
1,Run-2,Role Prompting,0.0,0.0,0.0,,Here is the extracted information in plain tex...
2,Run-3,Strict Output Formatting,3.75,5.0,1.5,"{""name"": ""Acme Super Blender (SB-200)"", ""price...","```json\n{\n ""name"": ""Acme Super Blender (SB-..."
3,Run-5,Final Optimized Prompt,3.75,5.0,1.5,"{""name"": ""Acme Super Blender (SB-200)"", ""price...","{\n ""name"": ""Acme Super Blender (SB-200)"",\n ..."



Saved results to ollama_structured_extraction_20251204-234607.jsonl


In [12]:

def validate_schema(parsed: Dict[str, Any]) -> Dict[str, Any]:
    errors = []
    if set(parsed.keys()) != {"name", "price", "date"}:
        errors.append("Top-level keys must be exactly: name, price, date.")
    if not isinstance(parsed.get("name"), str):
        errors.append("name must be a string.")
    price = parsed.get("price")
    if not isinstance(price, dict) or set(price.keys()) != {"amount", "currency"}:
        errors.append("price must be an object with keys: amount, currency.")
    else:
        if not isinstance(price["amount"], (int, float)):
            errors.append("price.amount must be a number.")
        if not isinstance(price["currency"], str):
            errors.append("price.currency must be a string.")
    if not isinstance(parsed.get("date"), str) or not ISO_DATE_RE.match(parsed["date"]):
        errors.append("date must be an ISO string YYYY-MM-DD.")
    return {"valid": len(errors) == 0, "errors": errors}

if final_result.parsed:
    print("\nSchema Validation (Final):", validate_schema(final_result.parsed))



Schema Validation (Final): {'valid': True, 'errors': []}
