
# PPS Testbed (Notebook GUI)

Simulate the **Proverb Pair Search (PPS)** frontend logic against your built bundle.

**Use**
1. Set `BUNDLE_PATH` below.
2. Click **Load Bundle**.
3. Choose X/Y keys, translation, and filters.
4. Click **Search** to render results with highlights.


In [1]:

# === Configuration ===
BUNDLE_PATH = "../../docs/data/v1/tools/pps/shk_pairs.json"  # adjust if needed
DEFAULT_RESULT_LIMIT = 100


In [2]:

import json, re
from pathlib import Path
from collections import defaultdict
import ipywidgets as W
from IPython.display import display, HTML

bundle = None
x_index = {}
y_index = {}
entries = []
trans_ids = []
default_trans = "kjv"

def load_bundle(path:str):
    global bundle, x_index, y_index, entries, trans_ids, default_trans
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"Bundle not found: {p}")
    bundle = json.loads(p.read_text(encoding="utf-8"))
    entries = bundle.get("entries", [])
    idx = bundle.get("index", {})
    x_index = idx.get("x", {})
    y_index = idx.get("y", {})
    trans_meta = bundle.get("transMeta", {})
    trans_ids = sorted(trans_meta.keys())
    default_trans = bundle.get("defaultTrans", trans_ids[0] if trans_ids else "kjv")
    return {
        "stats": bundle.get("stats", {}),
        "x_keys": sorted(x_index.keys()),
        "y_keys": sorted(y_index.keys()),
        "trans_ids": trans_ids,
        "default_trans": default_trans
    }

def norm_key(s:str)->str:
    import re
    return re.sub(r"\s+", " ", (s or "").strip().lower())

def pair_matches_filters(pair, x_keys_set, y_keys_set, require_all_x, require_all_y):
    px = [norm_key(pair["x"].get("key",""))] + [norm_key(k) for k in pair["x"].get("keys",[])]
    py = [norm_key(pair["y"].get("key",""))] + [norm_key(k) for k in pair["y"].get("keys",[])]
    px_set, py_set = set(px), set(py)

    if x_keys_set:
        if require_all_x and not x_keys_set.issubset(px_set):
            return False
        if not require_all_x and x_keys_set.isdisjoint(px_set):
            return False
    if y_keys_set:
        if require_all_y and not y_keys_set.issubset(py_set):
            return False
        if not require_all_y and y_keys_set.isdisjoint(py_set):
            return False
    return True

def highlight_text(text, phrases):
    if not phrases:
        return text
    phrases = [p for p in phrases if p]
    phrases_sorted = sorted(set(phrases), key=len, reverse=True)
    import re
    esc = [re.escape(p) for p in phrases_sorted]
    if not esc:
        return text
    pat = re.compile("(" + "|".join(esc) + ")", flags=re.IGNORECASE)
    return pat.sub(r"<mark>\1</mark>", text)

def gather_roots(pair, side: str):
    s = pair.get(side, {})
    vals = []
    r = (s.get("root") or "").strip()
    if r:
        vals.append(r)
    vals += [ (rr or "").strip() for rr in s.get("roots", []) if rr ]

    # de-dup, case-insensitive, preserve order
    seen = set()
    out = []
    for v in vals:
        key = v.lower()
        if v and key not in seen:
            seen.add(key)
            out.append(v)
    return out



In [3]:

def run_search(selected_x, selected_y, mode_x, mode_y, selected_trans,
               book_filter, ref_contains, limit):
    if bundle is None:
        raise RuntimeError("Bundle not loaded. Click 'Load Bundle'.")

    x_set = set(map(norm_key, selected_x or []))
    y_set = set(map(norm_key, selected_y or []))
    require_all_x = (mode_x == "All")
    require_all_y = (mode_y == "All")

    candidate_eids = set(range(len(entries)))

    if x_set:
        pid_union = set()
        for k in x_set:
            pid_union.update(bundle["index"]["x"].get(k, []))
        eid_from_pid = set()
        for eid, e in enumerate(entries):
            pids_here = {p["pairId"] for p in e.get("pairs",[])}
            if pids_here & pid_union:
                eid_from_pid.add(eid)
        candidate_eids &= eid_from_pid
    if y_set:
        pid_union = set()
        for k in y_set:
            pid_union.update(bundle["index"]["y"].get(k, []))
        eid_from_pid = set()
        for eid, e in enumerate(entries):
            pids_here = {p["pairId"] for p in e.get("pairs",[])}
            if pids_here & pid_union:
                eid_from_pid.add(eid)
        candidate_eids &= eid_from_pid

    results = []
    for eid in sorted(candidate_eids):
        e = entries[eid]
        if book_filter:
            if not str(e["ref"]).lower().startswith(book_filter.lower()):
                continue
        if ref_contains:
            if ref_contains.lower() not in str(e["ref"]).lower():
                continue

        tmap = e.get("text", {})
        text = tmap.get(selected_trans) or next(iter(tmap.values()), "")

        matched_pairs = []
        for p in e.get("pairs", []):
            if pair_matches_filters(p, x_set, y_set, require_all_x, require_all_y):
                matched_pairs.append(p)
        if not matched_pairs:
            continue

        results.append({
            "eid": eid,
            "entry": e,
            "text": text,
            "pairs": matched_pairs
        })
        if len(results) >= limit:
            break
    return results


In [4]:

from IPython.display import display, HTML

# def render_results(results, selected_trans):
#     if not results:
#         display(HTML("<p><em>No results.</em></p>"))
#         return

#     blocks = []
#     for r in results:
#         e = r["entry"]
#         ref = e["ref"]
#         text = r["text"]
#         pairs = r["pairs"]

#         x_phrases, y_phrases = [], []
#         for p in pairs:
#             x_phrases += gather_roots(p, "x")
#             y_phrases += gather_roots(p, "y")
#         hi_text = highlight_text(text, x_phrases + y_phrases)

#         items = []
#         for p in pairs:
#             xk = p["x"].get("key","")
#             yk = p["y"].get("key","")
#             xr = ", ".join(gather_roots(p,"x")) or "—"
#             yr = ", ".join(gather_roots(p,"y")) or "—"
#             items.append(f"<li><strong>{xk}</strong> → <strong>{yk}</strong>"
#                          f"<br><small>X roots: {xr}<br>Y roots: {yr}</small></li>")

#         blocks.append(f'''
#         <div style="border:1px solid #ccc;padding:12px;margin:10px 0;border-radius:8px;">
#           <div style="font-weight:600;margin-bottom:6px;">{ref} &middot; <code>{selected_trans}</code></div>
#           <div style="line-height:1.6;">{hi_text}</div>
#           <ul style="margin-top:8px;">{''.join(items)}</ul>
#         </div>''')
#     display(HTML("".join(blocks)))

def render_results(results, selected_trans):
    if not results:
        display(HTML("<p><em>No results.</em></p>"))
        return

    def gather_roots_dedup(pair, side):
        s = pair.get(side, {})
        vals = []
        r = (s.get("root") or "").strip()
        if r: vals.append(r)
        vals += [ (rr or "").strip() for rr in s.get("roots", []) if rr ]
        seen, out = set(), []
        for v in vals:
            k = v.lower()
            if v and k not in seen:
                seen.add(k); out.append(v)
        return out

    blocks = []
    for r in results:
        e = r["entry"]
        ref = e["ref"]
        text = r["text"]
        pairs = r["pairs"]

        # phrases to highlight
        phrases = []
        for p in pairs:
            phrases += gather_roots_dedup(p, "x")
            phrases += gather_roots_dedup(p, "y")
        hi_text = highlight_text(text, phrases)

        # compact pair list
        items = []
        for p in pairs:
            xk = p["x"].get("key","")
            yk = p["y"].get("key","")
            xr = gather_roots_dedup(p,"x")
            yr = gather_roots_dedup(p,"y")
            # Prefer a single root on each side if available; else fall back to keys
            x_show = xr[0] if xr else xk
            y_show = yr[0] if yr else yk
            items.append(f"<li><strong>{xk}</strong> → <strong>{yk}</strong>" # adjust pill content displayed
                         f"<br><span style='opacity:.8'>{x_show} → {y_show}</span></li>")

        blocks.append(f'''
        <div style="border:1px solid #ccc;padding:12px;margin:10px 0;border-radius:8px;">
          <div style="font-weight:600;margin-bottom:6px;">{ref} &middot; <code>{selected_trans}</code></div>
          <div style="line-height:1.6;">{hi_text}</div>
          <ul style="margin-top:8px;">{''.join(items)}</ul>
        </div>''')
    display(HTML("".join(blocks)))



In [5]:
# --- Summary of unique X→Y connections for current results ---

from IPython.display import HTML, display

def summarize_connections(results):
    pairs = set()
    for r in results:
        for p in r["pairs"]:
            xk = (p["x"].get("key","") or "").strip()
            yk = (p["y"].get("key","") or "").strip()
            if xk and yk:
                pairs.add((xk, yk))
    # sort by X then Y
    return sorted(pairs, key=lambda t: (t[0].lower(), t[1].lower()))

def render_summary(results):
    conns = summarize_connections(results)
    if not conns:
        return HTML("<p><em>No connections.</em></p>")
    chips = []
    for xk, yk in conns:
        chips.append(
            f'''<span style="display:inline-block;margin:4px 6px;padding:6px 10px;
                             border-radius:999px;border:1px solid #ddd;
                             background:#f8f8f8;font-size:90%;">
                    <strong>{xk}</strong> → <strong>{yk}</strong>
                </span>'''
        )
    return HTML("<div>" + "".join(chips) + "</div>")

summary_out = W.Output()


In [6]:
# --- Widgets (with filters + Select/Clear) ---
import ipywidgets as W
from IPython.display import display, HTML

stats_out = W.HTML()
load_btn = W.Button(description="Load Bundle", button_style="primary")
bundle_path_tb = W.Text(value=BUNDLE_PATH, description="Bundle Path:", layout=W.Layout(width="80%"))

x_keys_w = W.SelectMultiple(options=[], description="X keys", rows=10, layout=W.Layout(width="45%"))
y_keys_w = W.SelectMultiple(options=[], description="Y keys", rows=10, layout=W.Layout(width="45%"))

mode_x = W.ToggleButtons(options=["Any","All"], value="Any", description="Match X:")
mode_y = W.ToggleButtons(options=["Any","All"], value="Any", description="Match Y:")

trans_dd = W.Dropdown(options=[], description="Translation:")
book_tb = W.Text(value="", description="Book starts:", placeholder="e.g., PRO")
ref_tb = W.Text(value="", description="Ref contains:", placeholder="e.g., 22:4")
limit_int = W.IntSlider(value=DEFAULT_RESULT_LIMIT, min=1, max=500, step=1, description="Limit")

search_btn = W.Button(description="Search", button_style="success")
export_btn = W.Button(description="Export Results (JSON)")
out_box = W.Output()

# --- Better selection controls for X/Y (filters + select/clear) ---
x_all_keys, y_all_keys = [], []
x_filter_tb = W.Text(value="", description="Filter X:", placeholder="type to filter")
y_filter_tb = W.Text(value="", description="Filter Y:", placeholder="type to filter")
x_sel_all_btn = W.Button(description="Select All X")
x_clear_btn   = W.Button(description="Clear X")
y_sel_all_btn = W.Button(description="Select All Y")
y_clear_btn   = W.Button(description="Clear Y")

def refresh_key_options_from_bundle():
    info = load_bundle(bundle_path_tb.value)
    global x_all_keys, y_all_keys
    x_all_keys = info["x_keys"]
    y_all_keys = info["y_keys"]
    x_keys_w.options = x_all_keys
    y_keys_w.options = y_all_keys
    trans_dd.options = info["trans_ids"] or [info["default_trans"]]
    trans_dd.value = info["default_trans"]
    stats = bundle.get("stats", {})
    stats_out.value = f"<b>Loaded.</b> Entries: {stats.get('entries')}, Pairs: {stats.get('pairs')}."

def on_load_clicked(_):
    refresh_key_options_from_bundle()

def apply_filters(*_):
    fx = x_filter_tb.value.strip().lower()
    fy = y_filter_tb.value.strip().lower()
    x_keys_w.options = [k for k in x_all_keys if fx in k.lower()]
    y_keys_w.options = [k for k in y_all_keys if fy in k.lower()]

x_filter_tb.observe(lambda ch: apply_filters(), names="value")
y_filter_tb.observe(lambda ch: apply_filters(), names="value")

def sel_all_x(_): x_keys_w.value = tuple(x_keys_w.options)
def clr_x(_):     x_keys_w.value = tuple()
def sel_all_y(_): y_keys_w.value = tuple(y_keys_w.options)
def clr_y(_):     y_keys_w.value = tuple()

x_sel_all_btn.on_click(sel_all_x)
x_clear_btn.on_click(clr_x)
y_sel_all_btn.on_click(sel_all_y)
y_clear_btn.on_click(clr_y)

def on_search_clicked(_):
    out_box.clear_output()
    summary_out.clear_output()
    with out_box:
        res = run_search(
            list(x_keys_w.value), list(y_keys_w.value),
            mode_x.value, mode_y.value,
            trans_dd.value,
            book_tb.value.strip(), ref_tb.value.strip(),
            limit_int.value
        )
        # Summary first (in its own output area)
    with summary_out:
        display(render_summary(res))
    with out_box:
        render_results(res, trans_dd.value)

def on_export_clicked(_):
    res = run_search(
        list(x_keys_w.value), list(y_keys_w.value),
        mode_x.value, mode_y.value,
        trans_dd.value,
        book_tb.value.strip(), ref_tb.value.strip(),
        limit_int.value
    )
    from pathlib import Path, json
    outp = Path("pps_search_results.json")
    outp.write_text(json.dumps(res, ensure_ascii=False, indent=2), encoding="utf-8")
    display(HTML(f"<p>Saved <code>{outp}</code> in this folder.</p>"))

load_btn.on_click(on_load_clicked)
search_btn.on_click(on_search_clicked)
export_btn.on_click(on_export_clicked)

ui = W.VBox([
    W.HBox([bundle_path_tb, load_btn]),
    stats_out,
    W.HBox([
        W.VBox([x_filter_tb, x_keys_w, W.HBox([x_sel_all_btn, x_clear_btn])], layout=W.Layout(width="50%")),
        W.VBox([y_filter_tb, y_keys_w, W.HBox([y_sel_all_btn, y_clear_btn])], layout=W.Layout(width="50%")),
    ]),
    W.HBox([mode_x, mode_y]),
    W.HBox([trans_dd, book_tb, ref_tb, limit_int]),
    W.HBox([search_btn, export_btn]),
    summary_out,
    out_box
])
display(ui)


VBox(children=(HBox(children=(Text(value='../../docs/data/v1/tools/pps/shk_pairs.json', description='Bundle Pa…