In [5]:
%pip install -q --upgrade pip
%pip install -q boto3


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
%cd ..
import os, pathlib
print("CWD:", os.getcwd())
print("Here:", [p.name for p in pathlib.Path('.').iterdir()])


/Users/kishorekumar/Hackathon_Project/Truthlens
CWD: /Users/kishorekumar/Hackathon_Project/Truthlens
Here: ['core', 'app', 'data', 'scorecards']


In [2]:
import boto3, json
session = boto3.Session(profile_name="default", region_name="us-east-2")
brt = session.client("bedrock-runtime")

In [4]:
import boto3, json

# use the default profile you configured
session = boto3.Session(profile_name="default", region_name="us-east-2")
brt = session.client("bedrock-runtime")
MODEL_ID = "us.amazon.nova-micro-v1:0"

# smoke test
resp = brt.converse(
    modelId=MODEL_ID,
    messages=[{"role":"user","content":[{"text": 'Return exactly {"ok": true}'}]}]
)
print(resp["output"]["message"]["content"][0]["text"])


Sure, here is the exact response you requested:

```json
{
  "ok": true
}
```


In [5]:
from datetime import datetime
import uuid, json

SYSTEM = """You score the reliability of a post using only the provided sources and policy.
Independence: count sources only if organizations differ.
Rules:
- If 2+ independent Tier A or P support and 0 contradict → +25.
- Any Tier A/P contradiction → −30.
- Each Tier B support → +15; each Tier C support → +5 (cap total support at +25).
- If total sources < 2 and no Tier P → clamp label to "Uncertain" (score ≤ 69).
Return EXACTLY a JSON object with fields:
version, score (0-100), label, contributions[], bullets[3], badges[], citations[], meta{post_id,run_id,latency_ms}.
No prose or markdown outside the JSON. Do not invent sources.
"""

def score_one(item, policy):
    t0 = datetime.utcnow()
    payload = {
        "post_id": item.get("post_id"),
        "post": item["post"],
        "sources": item["sources"],
        "scoring_policy": policy
    }
    response = brt.converse(
        modelId=MODEL_ID,
        system=[{"text": SYSTEM}],
        messages=[{"role":"user","content":[{"text": json.dumps(payload)}]}]
    )
    text = response["output"]["message"]["content"][0]["text"]
    sc = json.loads(text)
    sc.setdefault("version","1.0")
    sc.setdefault("badges",[])
    sc.setdefault("meta",{})
    sc["meta"]["post_id"] = item.get("post_id")
    sc["meta"]["run_id"] = sc["meta"].get("run_id", str(uuid.uuid4()))
    sc["meta"]["latency_ms"] = sc["meta"].get("latency_ms", int((datetime.utcnow()-t0).total_seconds()*1000))
    return sc


In [6]:
from pathlib import Path
import json

# if your notebook is inside Truthlens/app, ROOT is the parent
ROOT = Path("..") if Path(".").name == "app" else Path(".")

policy = json.loads((ROOT/"core/scoring.json").read_text())
samples = [json.loads(l) for l in (ROOT/"data/postproof/sample.jsonl").read_text().splitlines()]
len(samples), samples[0]["post_id"]


(3, 'p1')

In [7]:
import pathlib
outdir = (ROOT/"scorecards"); outdir.mkdir(exist_ok=True)

results = []
for item in samples:
    sc = score_one(item, policy)
    results.append(sc)
    print(item["post_id"], sc["label"], sc["score"])
    (outdir/f"{item['post_id']}.json").write_text(json.dumps(sc, indent=2))

len(results)


  t0 = datetime.utcnow()
  sc["meta"]["latency_ms"] = sc["meta"].get("latency_ms", int((datetime.utcnow()-t0).total_seconds()*1000))


p1 Supported 25
p2 Uncertain 10
p3 Uncertain 5


3

In [8]:
import pprint
pprint.pprint(results[0])


{'badges': [],
 'bullets': ['Airport closure due to snow confirmed by AP.',
             'City DOT confirms all flights suspended due to weather.',
             'Travel Blog provides unrelated terminal information.'],
 'citations': [{'source': 'AP',
                'title': 'Airport announces closure due to snowfall.'},
               {'source': 'City DOT',
                'title': 'All flights suspended today due to weather.'}],
 'contributions': [{'source': 'AP',
                    'stance': 'support',
                    'tier': 'A',
                    'weight': 25},
                   {'source': 'City DOT',
                    'stance': 'support',
                    'tier': 'P',
                    'weight': 25}],
 'label': 'Supported',
 'meta': {'latency_ms': None, 'post_id': 'p1', 'run_id': None},
 'score': 25,
 'version': '1.0'}


In [9]:
from datetime import datetime
import uuid, json

def finalize_score(sc, item, policy, t0):
    # (paste your full finalize_score here)
    ...

def score_one(item, policy):
    # (paste your full score_one here)
    ...


In [10]:
from pathlib import Path
ROOT = Path("..") if Path(".").name == "app" else Path(".")

policy = json.loads((ROOT/"core/scoring.json").read_text())
samples = [json.loads(l) for l in (ROOT/"data/postproof/sample.jsonl").read_text().splitlines()]

outdir = ROOT/"scorecards"; outdir.mkdir(exist_ok=True)

for item in samples:
    sc = score_one(item, policy)
    print(item["post_id"], sc["label"], sc["score"])
    (outdir/f"{item['post_id']}.json").write_text(json.dumps(sc, indent=2))


TypeError: 'NoneType' object is not subscriptable

In [11]:
import inspect
print(score_one)
print("-----")
print(inspect.getsource(score_one))


<function score_one at 0x10edbb9c0>
-----
def score_one(item, policy):
    # (paste your full score_one here)
    ...



In [13]:
import json, uuid
from datetime import datetime

def finalize_score(sc, item, policy, t0):
    """
    Recompute score deterministically using policy + provided sources.
    Fills label from ladder and ensures meta.run_id / meta.latency_ms are set.
    """
    base = policy.get("base", 50)
    w = policy["weights"]

    def T(x): return (x or "").upper()

    # counts per our simple synthetic schema
    sup_AP = sum(1 for s in item["sources"]
                 if s.get("stance") == "support" and T(s.get("tier")) in ("A", "P"))
    con_AP = sum(1 for s in item["sources"]
                 if s.get("stance") == "contradict" and T(s.get("tier")) in ("A", "P"))
    sup_B  = sum(1 for s in item["sources"]
                 if s.get("stance") == "support" and T(s.get("tier")) == "B")
    sup_C  = sum(1 for s in item["sources"]
                 if s.get("stance") == "support" and T(s.get("tier")) == "C")

    score = base
    # contradiction from Tier A/P dominates
    if con_AP >= 1:
        score += w.get("contradicts_from_tierA_or_P>=1", -30)

    # if 2+ independent Tier A/P supports and no contradiction → add +25
    if sup_AP >= 2 and con_AP == 0:
        score += w.get("supports_from_tierA_or_P>=2", 25)

    # add lower-tier support
    score += sup_B * w.get("tierB_support", 15)
    score += sup_C * w.get("tierC_support", 5)

    # clamp score to 0–100
    score = max(0, min(100, score))

    # evidence floor: if <2 sources and no Tier P, clamp label to Uncertain
    floor = policy.get("evidence_floor", {"min_sources": 2})
    has_tierP = any(T(s.get("tier")) == "P" for s in item["sources"])
    forced_label = None
    if len(item["sources"]) < floor.get("min_sources", 2) and not has_tierP:
        forced_label = "Uncertain"

    # map numeric score to label via ladder
    ladder = sorted(policy["ladder"], key=lambda b: b["min"], reverse=True)
    label = next((b["label"] for b in ladder if score >= b["min"]), ladder[-1]["label"])
    if forced_label:
        label = forced_label

    sc["score"] = score
    sc["label"] = label

    # ensure meta
    m = sc.setdefault("meta", {})
    if not m.get("run_id"):
        m["run_id"] = str(uuid.uuid4())
    if not m.get("latency_ms"):
        m["latency_ms"] = int((datetime.utcnow() - t0).total_seconds() * 1000)

    return sc

def score_one(item, policy):
    """
    Calls Bedrock (Nova Micro) to produce an explainer JSON,
    then normalizes numeric score/label deterministically.
    Always returns a dict.
    """
    t0 = datetime.utcnow()
    payload = {
        "post_id": item.get("post_id"),
        "post": item["post"],
        "sources": item["sources"],
        "scoring_policy": policy
    }

    # Ask the model
    resp = brt.converse(
        modelId=MODEL_ID,
        system=[{"text": SYSTEM}],
        messages=[{"role":"user","content":[{"text": json.dumps(payload)}]}]
    )

    # Extract text
    text = resp["output"]["message"]["content"][0]["text"].strip()

    # Parse JSON defensively
    try:
        sc = json.loads(text)
        if not isinstance(sc, dict):
            raise ValueError("Model returned non-dict JSON")
    except Exception:
        sc = {
            "version": "1.0",
            "score": policy.get("base", 50),
            "label": "Uncertain",
            "contributions": [],
            "bullets": ["Model returned non-JSON or unexpected shape."],
            "badges": [],
            "citations": [],
            "meta": {}
        }

    # normalize/override numeric score + label and fill meta
    sc = finalize_score(sc, item, policy, t0)
    return sc


In [14]:
import inspect
print(inspect.getsource(score_one).splitlines()[:5])


['def score_one(item, policy):', '    """', '    Calls Bedrock (Nova Micro) to produce an explainer JSON,', '    then normalizes numeric score/label deterministically.', '    Always returns a dict.']


In [15]:
test_sc = score_one(samples[0], policy)
print(type(test_sc), test_sc.get("label"), test_sc.get("score"))


  t0 = datetime.utcnow()


<class 'dict'> Supported 75


  m["latency_ms"] = int((datetime.utcnow() - t0).total_seconds() * 1000)


In [17]:
from pathlib import Path
import json
ROOT = Path("..") if Path(".").name == "app" else Path(".")
outdir = ROOT/"scorecards"; outdir.mkdir(exist_ok=True)

for item in samples:
    sc = score_one(item, policy)
    print(item["post_id"], sc["label"], sc["score"])
    (outdir/f"{item['post_id']}.json").write_text(json.dumps(sc, indent=2))


  t0 = datetime.utcnow()
  m["latency_ms"] = int((datetime.utcnow() - t0).total_seconds() * 1000)


p1 Supported 75
p2 Low 25
p3 Uncertain 55


In [1]:
# app/score.py
import os, json, uuid, sys, time
from datetime import datetime, timezone
from pathlib import Path

# ---------- Config ----------
REGION   = os.getenv("REGION", "us-east-2")
MODEL_ID = os.getenv("MODEL_ID", "us.amazon.nova-micro-v1:0")
USE_BEDROCK = os.getenv("USE_BEDROCK", "1") not in ("0", "false", "False")

SYSTEM = """You score the reliability of a post using only the provided sources and policy.
Independence: count sources only if organizations differ.
Rules:
- If 2+ independent Tier A or P support and 0 contradict → +25.
- Any Tier A/P contradiction → −30.
- Each Tier B support → +15; each Tier C support → +5 (cap total support at +25).
- If total sources < 2 and no Tier P → clamp label to "Uncertain" (score ≤ 69).
Return EXACTLY a JSON object with fields:
version, score (0-100), label, contributions[], bullets[3], badges[], citations[], meta{post_id,run_id,latency_ms}.
No prose or markdown outside the JSON. Do not invent sources.
"""

# ---------- Deterministic overlay ----------
def finalize_score(sc: dict, item: dict, policy: dict, t0):
    base = policy.get("base", 50)
    w = policy["weights"]
    T = lambda x: (x or "").upper()

    sup_AP = sum(1 for s in item["sources"]
                 if s.get("stance") == "support" and T(s.get("tier")) in ("A", "P"))
    con_AP = sum(1 for s in item["sources"]
                 if s.get("stance") == "contradict" and T(s.get("tier")) in ("A", "P"))
    sup_B  = sum(1 for s in item["sources"]
                 if s.get("stance") == "support" and T(s.get("tier")) == "B")
    sup_C  = sum(1 for s in item["sources"]
                 if s.get("stance") == "support" and T(s.get("tier")) == "C")

    score = base
    if con_AP >= 1:
        score += w.get("contradicts_from_tierA_or_P>=1", -30)
    if sup_AP >= 2 and con_AP == 0:
        score += w.get("supports_from_tierA_or_P>=2", 25)
    score += sup_B * w.get("tierB_support", 15)
    score += sup_C * w.get("tierC_support", 5)
    score = max(0, min(100, score))

    floor = policy.get("evidence_floor", {"min_sources": 2})
    has_tierP = any(T(s.get("tier")) == "P" for s in item["sources"])
    forced_label = "Uncertain" if (len(item["sources"]) < floor["min_sources"] and not has_tierP) else None

    ladder = sorted(policy["ladder"], key=lambda b: b["min"], reverse=True)
    label = next((b["label"] for b in ladder if score >= b["min"]), ladder[-1]["label"])
    if forced_label: label = forced_label

    sc.setdefault("version", "1.0")
    sc.setdefault("badges", [])
    sc.setdefault("meta", {})
    sc["score"] = score
    sc["label"] = label

    m = sc["meta"]
    m.setdefault("run_id", str(uuid.uuid4()))
    if not m.get("latency_ms"):
        m["latency_ms"] = int((datetime.now(timezone.utc) - t0).total_seconds() * 1000)

    return sc

# ---------- Bedrock call (optional) ----------
def call_bedrock(payload):
    import boto3  # lazy import
    brt = boto3.client("bedrock-runtime", region_name=REGION)
    resp = brt.converse(
        modelId=MODEL_ID,
        system=[{"text": SYSTEM}],
        messages=[{"role":"user","content":[{"text": json.dumps(payload)}]}],
        inferenceConfig={"maxTokens": 400}
    )
    text = resp["output"]["message"]["content"][0]["text"]
    return json.loads(text)

# ---------- Main scorer ----------
def score_one(item, policy):
    t0 = datetime.now(timezone.utc)
    payload = {
        "post_id": item.get("post_id"),
        "post": item["post"],
        "sources": item["sources"],
        "scoring_policy": policy
    }
    if USE_BEDROCK:
        try:
            sc = call_bedrock(payload)
        except Exception as e:
            # fail-safe: structured fallback
            sc = {"bullets": ["(fallback) scorer unavailable"], "contributions": [], "citations": [], "meta": {"post_id": item.get("post_id")}}
    else:
        sc = {"bullets": ["(offline) deterministic only"], "contributions": [], "citations": [], "meta": {"post_id": item.get("post_id")}}

    sc = finalize_score(sc, item, policy, t0)
    sc["meta"]["post_id"] = item.get("post_id")
    return sc

# ---------- CLI ----------
def run_cli(in_jsonl: Path, policy_path: Path, out_dir: Path):
    policy = json.loads(policy_path.read_text())
    out_dir.mkdir(parents=True, exist_ok=True)

    lines = [l for l in in_jsonl.read_text().splitlines() if l.strip()]
    samples = [json.loads(l) for l in lines]

    printed = 0
    for item in samples:
        sc = score_one(item, policy)
        printed += 1
        print(f"{item['post_id']} {sc['label']} {sc['score']}")
        (out_dir / f"{item['post_id']}.json").write_text(json.dumps(sc, indent=2))
    return printed

if __name__ == "__main__":
    ROOT = Path(__file__).resolve().parents[1]  # repo root
    in_jsonl = ROOT / "data/postproof/sample.jsonl"
    policy   = ROOT / "core/scoring.json"
    out_dir  = ROOT / "scorecards"

    n = run_cli(in_jsonl, policy, out_dir)
    print(f"\nWrote {n} scorecards to {out_dir}")


NameError: name '__file__' is not defined