In [None]:
!pip -q install openai==1.* rank-bm25 pandas pillow pymupdf tqdm rouge nltk
import nltk
nltk.download('punkt', quiet=True)

import os, shutil, subprocess
from pathlib import Path
from openai import OpenAI
os
client = OpenAI(api_key="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")
CSV_PATH  = Path("/content/rule_presence_qa.csv")
IMG_DIR   = Path("/content/presence_images")
REPO_DIR  = Path("/content/design_qa")
RULE_PDF  = Path("/content/FSAE_Rules_2024.pdf")

if not IMG_DIR.exists() or len(list(IMG_DIR.glob("*"))) == 0:
    if REPO_DIR.exists():
        shutil.rmtree(REPO_DIR)
    subprocess.run(["git","clone","-q","--depth","1",
                    "https://github.com/anniedoris/design_qa.git",
                    str(REPO_DIR)], check=True)
    IMG_DIR.mkdir(parents=True, exist_ok=True)
    subprocess.run(["bash","-lc", f"cp -r {REPO_DIR}/dataset/rule_comprehension/rule_presence_qa/* {IMG_DIR}/"], check=True)

# Count images
n_imgs = sum(len(list(IMG_DIR.glob(ext))) for ext in ("*.png","*.jpg","*.jpeg"))
print("Images ready:", n_imgs)
if not CSV_PATH.exists():
    print("Downloading Presence CSV (RAW)…")
    subprocess.run(["wget","-q","-O",str(CSV_PATH),
                    "https://raw.githubusercontent.com/anniedoris/design_qa/main/dataset/rule_comprehension/rule_presence_qa.csv"],
                    check=True)

assert CSV_PATH.exists() and CSV_PATH.stat().st_size > 0, "Presence CSV missing/empty."
with open(CSV_PATH, "r", encoding="utf-8", errors="ignore") as f:
    head = f.read(512).lower()
assert "<html" not in head, "Presence CSV looks like HTML/404. Re-download RAW CSV."
if not RULE_PDF.exists():
    candidates = list(Path("/content").glob("FSAE*.pdf"))
    if candidates:
        os.replace(str(candidates[0]), str(RULE_PDF))
    else:
        try:
            from google.colab import files
            print("Upload the FSAE Rules PDF (optional, improves RAG).")
            uploaded = files.upload()
            if uploaded:
                up_name = list(uploaded.keys())[0]
                os.replace(up_name, RULE_PDF)
        except Exception:
            pass

print("CSV:", CSV_PATH.exists(), "PDF:", RULE_PDF.exists(), "REPO:", REPO_DIR.exists())

import io, re, textwrap, base64, time, json, importlib.util
import pandas as pd
from PIL import Image
from tqdm import tqdm
from rank_bm25 import BM25Okapi

# Paths
IMG_ROOT  = IMG_DIR
PRED_CSV  = Path("/content/presence_predictions.csv")
SCORE_TXT = Path("/content/presence_score.txt")
MODEL        = "gpt-4o-mini"
MAX_SIDE     = 640
JPEG_QUALITY = 80
TEMPERATURE  = 0.0
MAX_TOKENS   = 2
CALL_DELAY   = 0.35
SAVE_EVERY   = 10
START_IDX    = 0
FORCE_RERUN  = True
assert CSV_PATH.exists() and CSV_PATH.stat().st_size > 0, "CSV missing or empty."
with open(CSV_PATH, "r", encoding="utf-8", errors="ignore") as f:
    head = f.read(512).lower()
assert "<html" not in head, "CSV looks like HTML/404. Re-download RAW CSV."
assert IMG_ROOT.exists(), f"Images folder missing: {IMG_ROOT}"

df = pd.read_csv(CSV_PATH)
required = {"question","image","ground_truth"}
assert required.issubset(df.columns), f"CSV must have columns: {required}"
try:
    import fitz
    HAVE_PYMUPDF = True
except Exception:
    HAVE_PYMUPDF = False

def build_bm25_from_pdf(pdf_path: Path):
    if not HAVE_PYMUPDF or not pdf_path.exists():
        return None, []
    paras = []
    doc = fitz.open(str(pdf_path))
    for page in doc:
        text = page.get_text("text")
        for blk in re.split(r"\n\s*\n", text):
            s = " ".join(blk.strip().split())
            if len(s) >= 40:
                paras.append(s)
    if not paras:
        return None, []
    return BM25Okapi([p.lower().split() for p in paras]), paras

BM25, PDF_PARAS = build_bm25_from_pdf(RULE_PDF)
print(f"[RAG] PDF exists: {RULE_PDF.exists()} | paragraphs indexed: {len(PDF_PARAS)} | BM25: {'ON' if BM25 else 'OFF'}")

def get_short_ctx(q: str, k: int = 2, limit: int = 300) -> str:
    if not BM25:
        return ""
    toks = q.lower().split() + ["visibility", "component", "rule"]
    hits = BM25.get_top_n(toks, PDF_PARAS, n=k)
    if not hits: return ""
    ctx = " ".join(hits)
    return textwrap.shorten(ctx, width=limit, placeholder="…")

SYSTEM_PROMPT = "You are an FSAE visual judge. Reply with only one token: Yes or No."

def downscale_to_b64(img_path: Path, max_side=MAX_SIDE, quality=JPEG_QUALITY) -> str:
    with Image.open(img_path).convert("RGB") as im:
        w, h = im.size
        s = max_side / max(w, h)
        if s < 1.0:
            im = im.resize((int(w*s), int(h*s)))
        buf = io.BytesIO()
        im.save(buf, format="JPEG", quality=quality)
    return base64.b64encode(buf.getvalue()).decode("utf-8")

def ask_yesno(image_b64: str, question: str, ctx: str) -> str:
    user_txt = f"Question: {question}\n" + (f"Context: {ctx}\n" if ctx else "") + "Answer Yes or No only."
    msgs = [
        {"role":"system","content":SYSTEM_PROMPT},
        {"role":"user","content":[
            {"type":"text","text":user_txt},
            {"type":"image_url","image_url":{"url": f"data:image/jpeg;base64,{image_b64}"}}
        ]}
    ]
    r = client.chat.completions.create(
        model=MODEL, messages=msgs, temperature=TEMPERATURE, max_tokens=MAX_TOKENS, timeout=60
    )
    a = (r.choices[0].message.content or "").strip().lower()
    if a.startswith("y"): return "Yes"
    if a.startswith("n"): return "No"
    return "No"
preds, confs, expls = [], [], []
if FORCE_RERUN:
    preds, confs, expls = [], [], []
    if PRED_CSV.exists():
        try:
            PRED_CSV.unlink()
            print("[i] FORCE_RERUN=True → deleted old predictions CSV")
        except Exception as e:
            print("[warn] could not delete old CSV:", e)
while len(preds) < len(df): preds.append("")
while len(confs) < len(df): confs.append(0.0)
while len(expls) < len(df): expls.append("")


for i in tqdm(range(START_IDX, len(df)), total=len(df)-START_IDX):
    row = df.iloc[i]
    img_path = IMG_ROOT / str(row["image"])
    if not img_path.exists():
        img_path = Path(str(row["image"]))
    if not img_path.exists():
        raise FileNotFoundError(f"Image not found: {img_path}")

    b64 = downscale_to_b64(img_path)
    q   = str(row["question"])
    ctx = get_short_ctx(q)

    try:
        ans = ask_yesno(b64, q, ctx)
    except Exception:
        time.sleep(1.2)
        try:
            ans = ask_yesno(b64, q, ctx)
        except Exception as e2:
            ans = "No"

    preds[i] = ans
    confs[i] = 0.0
    expls[i] = ""

    if ((i+1) % SAVE_EVERY == 0) or (i+1 == len(df)):
        out = df.copy()
        out["prediction"]  = preds
        out["confidence"]  = confs
        out["explanation"] = expls
        out.to_csv(PRED_CSV, index=False)

    time.sleep(CALL_DELAY)

out = df.copy()
out["prediction"]  = preds
out["confidence"]  = confs
out["explanation"] = expls
out.to_csv(PRED_CSV, index=False)

gt = out["ground_truth"].astype(str).str.strip().str.lower().replace(
    {"y":"yes","true":"yes","n":"no","false":"no"})
pr = out["prediction"].astype(str).str.strip().str.lower()
acc = (gt==pr).mean()

with open(SCORE_TXT,"w") as f:
    f.write(f"Presence (ACC): {acc:.3f}\n")
    f.write(f"Rows: {len(out)}\n")
    f.write(f"Pred file: {PRED_CSV}\n")

print(f"[DONE] Presence-Lite ACC={acc:.3f}")
print(f"Wrote: {PRED_CSV}  |  {SCORE_TXT}")


# Official Presence scoring (DesignQA metrics)

import sys
RESULTS_DIR = Path("/content/results"); RESULTS_DIR.mkdir(parents=True, exist_ok=True)
if not REPO_DIR.exists():
    subprocess.run(["git","clone","-q","https://github.com/anniedoris/design_qa.git", str(REPO_DIR)], check=True)
sys.path.append(str(REPO_DIR))
subprocess.run([sys.executable,"-m","pip","install","-q","rouge","nltk"], check=True)
import nltk; nltk.download('punkt', quiet=True)

GT   = pd.read_csv(CSV_PATH).copy()
PRED = pd.read_csv(PRED_CSV).copy()

for col in ["image","answer","label","gt","expected","GroundTruth","question","ground_truth"]:
    if col in GT.columns: GT[col] = GT[col].astype(str)
for col in ["image","prediction","model_prediction","reasoning","explanation"]:
    if col in PRED.columns: PRED[col] = PRED[col].astype(str)

from pathlib import Path as _P
def _norm_png(x):
    s = "" if pd.isna(x) else str(x).strip()
    s = _P(s).name
    if not s.lower().endswith(".png"):
        s = f"{s}.png"
    return s.lower()

def pick_col(df, candidates):
    for c in candidates:
        if c in df.columns: return c
    return None

YES, NO, INS = "Yes", "No", "INSUFFICIENT RULE EVIDENCE"
def _norm_truth(s: str) -> str:
    t = (s or "").strip().lower()
    if t.startswith("y"): return YES
    if t.startswith("n"): return NO
    if "insufficient" in t: return INS
    return NO

def _norm_pred(s: str) -> str:
    t = (s or "").strip().lower()
    if t.startswith("y"): return YES
    if t.startswith("n"): return NO
    if "insufficient" in t: return INS
    return NO

GT["image_norm"] = GT.get("image","").apply(_norm_png)
if "image" in PRED.columns:
    PRED["image_norm"] = PRED["image"].apply(_norm_png)
else:
    PRED["image_norm"] = ""
if "ground_truth" not in GT.columns:
    alt = pick_col(GT, ["answer","label","gt","expected","GroundTruth"])
    if alt:
        GT = GT.rename(columns={alt:"ground_truth"})
    else:
        raise KeyError(f"No GT label column in GT. Columns={list(GT.columns)}")
if "image_norm" in PRED.columns and PRED["image_norm"].str.len().gt(0).any():
    J = GT.merge(PRED, on="image_norm", how="inner", suffixes=("_gt","_pred"))
elif "image" in GT.columns and "image" in PRED.columns:
    J = GT.merge(PRED, on="image", how="inner", suffixes=("_gt","_pred"))
    J["image_norm"] = J["image"].apply(_norm_png)
else:
    print("[WARN] Aligning by row order (no reliable image key).")
    PRED = PRED.reindex(range(len(GT))).copy()
    J = pd.concat([GT.reset_index(drop=True), PRED.reset_index(drop=True)], axis=1)
    base_img_col = pick_col(J, ["image_gt","image"])
    if base_img_col is None:
        raise KeyError("No image column found after fallback alignment.")
    J["image_norm"] = J[base_img_col].apply(_norm_png)

missing = len(GT) - len(J)
if missing > 0:
    print(f"[WARN] {missing} GT rows had no matching prediction by image name.")
gt_col = pick_col(J, ["ground_truth","ground_truth_gt","answer_gt","label_gt","gt_gt","expected_gt","GroundTruth_gt",
                      "answer","label","gt","expected","GroundTruth"])
if gt_col is None:
    raise KeyError(f"Couldn't find ground truth column after merge. Columns={list(J.columns)}")

pred_col = "model_prediction" if "model_prediction" in J.columns else ("prediction" if "prediction" in J.columns else None)
if pred_col is None:
    raise KeyError(f"Couldn't find prediction column after merge. Columns={list(J.columns)}")

J["_gt_norm"]   = J[gt_col].map(_norm_truth)
J["_pred_norm"] = J[pred_col].map(_norm_pred)
PRESENCE_EVAL = str(RESULTS_DIR / "presence_eval_official.csv")
df_eval = pd.DataFrame({
    "ground_truth": J["_gt_norm"].astype(str),
    "model_prediction": J["_pred_norm"].astype(str),
    "explanation": "__"
})
df_eval.to_csv(PRESENCE_EVAL, index=False)
print(f"[OK] Prepared official Presence eval CSV → {PRESENCE_EVAL}")

# Try repo metric
try:
    from eval.metrics.metrics import eval_presence_qa, eval_boolean_qa
except Exception:
    import importlib.util
    metrics_py = REPO_DIR / "eval" / "metrics" / "metrics.py"
    spec = importlib.util.spec_from_file_location("dq_metrics", str(metrics_py))
    dq = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(dq)
    eval_presence_qa = getattr(dq, "eval_presence_qa", None)
    eval_boolean_qa  = getattr(dq, "eval_boolean_qa", None)

score = None
if callable(eval_presence_qa):
    try:
        score = eval_presence_qa(PRESENCE_EVAL)
        print(f"[OK] eval_presence_qa: {score}")
    except Exception as e:
        print("[warn] eval_presence_qa failed:", e)

if score is None and callable(eval_boolean_qa):
    try:
        score = eval_boolean_qa(PRESENCE_EVAL)
        print(f"[OK] eval_boolean_qa: {score}")
    except Exception as e:
        print("[warn] eval_boolean_qa failed:", e)

# Final fallback: simple ACC
if score is None:
    acc2 = (df_eval["ground_truth"].str.lower() == df_eval["model_prediction"].str.lower()).mean()
    score = acc2
    print(f"[fallback] Simple ACC: {acc2:.3f}")

with open("/content/presence.txt", "w") as f:
    f.write("DesignQA Results\n")
    f.write("Subset: Presence\n")
    f.write(f"Num Questions: {len(J)}\n")
    try:
        f.write(f"ACC: {float(score):.6f}\n")
    except Exception:
        f.write(f"Score: {score}\n")

print("Score file → /content/presence.txt")
