In [None]:
# ────────────────────────────────
# Cell 1  Imports & hyper‑params
# ────────────────────────────────
import json, random, hashlib, math, itertools, textwrap
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer   # filler stub
np.set_printoptions(linewidth=120, threshold=20)

D        = 4096          # CHV dimensionality
ROLES    = [                     # frozen slot list
    "Subject", "Predicate", "Object",
    "Event",   "Tense",    "Attr",
    "IndirectObject", "Type", "Source", "Date", "Venue"
]
SEED     = 42
rng      = np.random.default_rng(SEED)


In [None]:
# ────────────────────────────────
# Cell 2  Load the demo JSON
# ────────────────────────────────
demo_json = """
{  "version": "2.1",
   "sentence": "I saw a white dog chase the brown cat quickly in the backyard.",
   "nodes": [...],   "edges": [...], "layouts": {...}
}
""".replace("...", "")   #  ← paste the full record from your prompt
data = json.loads(demo_json)

print("Sentence:", data["sentence"])
print("Total nodes:", len(data["nodes"]), "  edges:", len(data["edges"]))


In [None]:
# ────────────────────────────────
# Cell 3  Distil JSON → tuple list τ = {(ℓ,s)}
# ────────────────────────────────
def tuples_from_json(d):
    tup = []
    for node in d["nodes"]:
        role = node["roles"][0]                # pick the first role
        if role not in ROLES:                  # ignore meta/out for MVP
            continue
        filler = node["filler"]
        tup.append((role, filler))
    return tup

tau = tuples_from_json(data)
print("Extracted tuples:")
for r, f in tau:
    print(" •", f"{r:>12}  ⟶  {f}")


In [None]:
# ────────────────────────────────
# Cell 4  Role & filler vectors (pseudo)
# ────────────────────────────────
# (i) Orthogonal ±1 role basis
role_vec = {r: rng.choice([-1, 1], size=D, replace=True) for r in ROLES}

# (ii) Filler projection: TF‑IDF → dense → sign
tfidf = TfidfVectorizer()
tfidf.fit([n["filler"] for n in data["nodes"]])          # tiny fit
A = rng.choice([-1, 1], size=(D, len(tfidf.vocabulary_))) / math.sqrt(D)

def filler_vec(token: str) -> np.ndarray:
    v = tfidf.transform([token]).toarray()[0]            # 1×V
    proj = A @ v                                         # JL
    return np.where(proj >= 0, 1, -1).astype(np.int8)

# quick unit test
print("role⊥role cos:",
      np.dot(role_vec["Subject"], role_vec["Predicate"])/D)
print("filler self‑cos:",
      np.dot(filler_vec("dog"), filler_vec("dog"))/D)


In [None]:
# ────────────────────────────────
# Cell 5  Bind (⊗) & bundle (⊕) ⇒ composite CHV
# ────────────────────────────────
def encode_sentence(tuples):
    bound = []
    for r, tok in tuples:
        b = role_vec[r] * filler_vec(tok)       # Hadamard = XOR on ±1
        bound.append(b)
    superpos = np.sum(bound, axis=0)
    return np.where(superpos >= 0, 1, -1).astype(np.int8)

e_src = encode_sentence(tau)
print("CHV popcount (+1 bits):", int((e_src == 1).sum()))


In [None]:
# ────────────────────────────────
# Cell 6  Dummy positive & negative for kernel training
# ────────────────────────────────
# Positive = same sentence (self‑match)
e_pos = e_src.copy()

# Negative = shuffled fillers
tau_neg = [(r, random.choice(["apple","table","run"])) for r,_ in tau]
e_neg = encode_sentence(tau_neg)

# Batch tensors (3×D)
batch = np.stack([e_src, e_pos, e_neg]).astype(np.float32)
labels = np.array([1, 1, 0], dtype=np.float32)          # crude supervision


In [None]:
# ────────────────────────────────
# Cell 7  Parametrised kernel Kφ  (dense numpy prototype)
# ────────────────────────────────
W_S = np.eye(D, dtype=np.float32)                       # symmetric
W_A = np.zeros((D, D), dtype=np.float32)                # antisymmetric
M   = np.zeros((len(ROLES), len(ROLES)), dtype=np.float32)

def slot_sum(e):
    """Compute 32‑d slot‑sum vector s from CHV e."""
    return np.array([np.dot(role_vec[r], e) for r in ROLES], dtype=np.float32)

def K_phi(e_src, e_dst):
    s_src, s_dst = slot_sum(e_src), slot_sum(e_dst)
    z = (e_src @ W_S @ e_dst
         + e_src @ W_A @ e_dst
         + s_src @ M @ s_dst)
    return 1/(1+np.exp(-z))                             # logistic


In [None]:
# ────────────────────────────────
# Cell 8  One contrastive update (SGD) on kernel weights
# ────────────────────────────────
lr = 1e-3
for it in range(1):                                     # one mini‑step
    grad_S = np.zeros_like(W_S)
    loss = 0.0
    for i in range(len(batch)):
        for j in range(len(batch)):
            if i == j: continue
            y = 1 if labels[i] == labels[j] else 0
            k = K_phi(batch[i], batch[j])
            # BCE gradient w.r.t symmetric block only
            dL_dk = k - y
            grad_S += dL_dk * np.outer(batch[i], batch[j])
            loss   += -(y*np.log(k+1e-8)+(1-y)*np.log(1-k+1e-8))
    W_S -= lr * grad_S
print(f"mini‑loss = {loss:.4f}")


In [None]:
# ────────────────────────────────
# Cell 9  Minimal deterministic RL pointer πθ
# ────────────────────────────────
# State = (slot_sum(query), current window len)
state_dim = len(ROLES) + 1
theta = rng.standard_normal((state_dim, 2)) * 0.05      # Δwindow logits {‑1,+1}
γ = 0.99

def policy(state):
    logits = state @ theta
    action = np.argmax(logits) * 2 - 1                  # {‑1,+1}
    return action, logits

state = np.concatenate([slot_sum(e_src)/D, [3]])        # start width = 3
act, raw = policy(state)
print("Action Δw =", act, "   logits:", raw)

# one fictitious reward = kernel score against negative
reward = -K_phi(e_src, e_neg)
td_error = reward + γ*0 - 0                             # V(s')=0 baseline
theta += 1e-2 * td_error * np.outer(state, (act==1, act==-1))
print("TD_err:", td_error)


In [None]:
# ────────────────────────────────
# Cell 10  Quick inspection helpers
# ────────────────────────────────
def explain(e):
    for r in ROLES:
        probe = role_vec[r] * e
        score = np.dot(probe, filler_vec("dog"))/D
        if score > 0.2:
            print(f"{r:>12}  likely contains 'dog' (cos={score:.2f})")
print("--- Debug unbinding ---")
explain(e_src)
