In [30]:
# === Quick responsiveness test inside a notebook ===
# Prereqs:
#   pip install ollama pandas
from pathlib import Path
import json, re
import pandas as pd
from ollama import chat   # matches your example
# ---------------- Configuration ----------------
CASE_BACKGROUND = """
This is a hypothetical antitrust investigation about widget pricing.
Documents are responsive if they discuss widget pricing strategy,
communications with competitors, or evidence of market allocation.
"""
FOLDER = "./test_docs"  # folder containing files to test
MODELS = ["qwen3:4b","gemma3n:e4b",  "llama3.2:3b"]
SYSTEM_PROMPT = (
   "You are an eDiscovery reviewer. "
   "Decide if the document is RESPONSIVE to the case background. "
   "Output JSON: {\"label\":\"Responsive|Not Responsive|Needs Review\", \"rationale\":\"...\"}"
)
USER_TEMPLATE = "CASE BACKGROUND:\n{case}\n\nDOCUMENT:\n{doc}"
def read_file_text(p: Path, max_chars=5000) -> str:
   try:
       return p.read_text(encoding="utf-8", errors="ignore")[:max_chars]
   except:
       return ""
def call_model(model: str, case_text: str, doc_text: str) -> dict:
   msg = USER_TEMPLATE.format(case=case_text, doc=doc_text)
   resp = chat(
       model=model,
       messages=[
           {"role": "system", "content": SYSTEM_PROMPT},
           {"role": "user", "content": msg},
       ],
       think=False
   )
   raw = resp.message.content
   raw=re.sub(r"<think>[\s\S]*?</think>", "", raw, flags=re.IGNORECASE)
   m = re.search(r"\{.*\}", raw, re.DOTALL)
   try:
       data = json.loads(m.group(0) if m else raw)
       label = str(data.get("label","Needs Review")).strip()
       rationale = str(data.get("rationale","")).strip()
   except Exception:
        label, rationale = "Needs Review", f"Parse error: {raw[:100]}"
   return {"label": label, "rationale": rationale}
def majority_vote(labels):
   votes = {"Responsive":0, "Not Responsive":0, "Needs Review":0}
   for l in labels:
       if l in votes: votes[l]+=1
       else: votes["Needs Review"]+=1
   best = max(votes.items(), key=lambda kv: kv[1])
   # tie -> Needs Review
   if list(votes.values()).count(best[1]) > 1:
       return "Needs Review"
   return best[0]
results = []
for f in Path(FOLDER).glob("*"):
   print(f)
   if not f.is_file(): continue
   text = read_file_text(f)
   model_votes, model_rats = {}, {}
   for m in MODELS:
       out = call_model(m, CASE_BACKGROUND, text)
       model_votes[m] = out["label"]
       model_rats[m] = out["rationale"]
   final = majority_vote(list(model_votes.values()))
   print(final)
   results.append({
       "file": f.name,
       "final_vote": final,
       **{f"vote_{m}": v for m,v in model_votes.items()},
       **{f"rat_{m}": r for m,r in model_rats.items()},
   })
df = pd.DataFrame(results)
df

test_docs\New Text Document (2).txt
Not Responsive
test_docs\New Text Document (3).txt
Responsive
test_docs\New Text Document (4).txt
Not Responsive
test_docs\New Text Document.txt
Responsive


Unnamed: 0,file,final_vote,vote_qwen3:4b,vote_gemma3n:e4b,vote_llama3.2:3b,rat_qwen3:4b,rat_gemma3n:e4b,rat_llama3.2:3b
0,New Text Document (2).txt,Not Responsive,Not Responsive,Not Responsive,Not Responsive|Needs Review,The document is an internal holiday party invi...,This document is a holiday party invitation an...,The document does not discuss widget pricing s...
1,New Text Document (3).txt,Responsive,Responsive,Responsive,Not Responsive,The document explicitly details a market alloc...,The document discusses 'regional market alloca...,The document does not discuss widget pricing s...
2,New Text Document (4).txt,Not Responsive,Not Responsive,Not Responsive,Not Responsive|Needs Review,The document is a weekly company newsletter fo...,"The document discusses employee spotlights, vo...",The document does not discuss widget pricing s...
3,New Text Document.txt,Responsive,Responsive,Responsive,Responsive,The document explicitly discusses an agreement...,This document directly discusses widget pricin...,The document discusses widget pricing strategy...


In [6]:
df.to_csv('test0916.csv')