Reference:
- https://chatgpt.com/share/6865fe5a-fdac-800e-b719-945403c9d33d

In [1]:
import json, re, uuid, hashlib
from collections import defaultdict
from pathlib import Path
import spacy
from allennlp.predictors.predictor import Predictor

# ── deterministic seeds ───────────────────────────────────────
import random, numpy as np, torch
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)

# ── load NLP models once ──────────────────────────────────────
NLP = spacy.load("en_core_web_sm", disable=["ner", "textcat", "lemmatizer"])
SRL = Predictor.from_path(
    "https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz",
    cuda_device=-1,
)


  from .autonotebook import tqdm as notebook_tqdm
  return torch.cuda.amp.custom_fwd(orig_func)  # type: ignore
  return torch.cuda.amp.custom_bwd(orig_func)  # type: ignore


AttributeError: module transformers has no attribute AdamW

In [None]:
SENTENCE = "I saw a white dog chase the brown cat quickly in the backyard."
print(SENTENCE)


In [None]:
doc = NLP(SENTENCE)
tokens = [t.text for t in doc]
deps   = [(t.text, t.dep_, t.head.text, t.i, t.head.i) for t in doc]
print("Tokens:", tokens)
print("Dependency triples (token, dep, head):")
for t,dep,h,_,_ in deps:
    print(f"  {t:>10} ─{dep:<8}→ {h}")


In [None]:
srl_out = SRL.predict(sentence=SENTENCE)
frames  = srl_out["verbs"]           # list of dicts: {verb, tags}
print("SRL Frames:")
for f in frames:
    print(f"{f['verb']:<6} → {f['description']}")


In [None]:
DEP2ROLE = {
    "nsubj": "Subject", "csubj": "Subject",
    "dobj": "Object", "obj": "Object",
    "iobj": "IndirectObject",
    "advmod": "Attr", "amod": "Attr",
}
SRL2ROLE = {
    "ARG0": "Subject", "ARG1": "Object", "ARG2": "IndirectObject",
    "AM-TMP": "Time", "AM-LOC": "IndirectObject", "AM-MNR": "Attr",
}

proto_tuples = []

# from dependencies
for tok,dep,head,ti,hi in deps:
    if dep in DEP2ROLE:
        proto_tuples.append({"role": DEP2ROLE[dep], "span": (ti, ti+1), "text": tok})

# from SRL
for vf in frames:
    tags = vf["tags"]
    for idx, tag in enumerate(tags):
        if tag.startswith("B-"):
            role = tag[2:]
            end = idx
            while end+1 < len(tags) and tags[end+1].startswith("I-"):
                end += 1
            if role in SRL2ROLE:
                span_text = " ".join(tokens[idx:end+1])
                proto_tuples.append({"role": SRL2ROLE[role],
                                     "span": (idx, end+1),
                                     "text": span_text})

print("Proto‑tuples:")
for t in proto_tuples:
    print(t)


In [None]:
# Simple heuristic: every finite verb starts a new event.
# spaCy POS: FINITE if tag_ in {"VBD","VBP","VBZ"} or modal head
eids, eid_counter = {}, 0
for tok in doc:
    if tok.pos_ == "VERB" and tok.morph.get("VerbForm") != ["Inf"]:
        eid_counter += 1
        eids[tok.i] = f"e{eid_counter}"

# propagate eid to SRL verb indices
for f in frames:
    v_idx = f["verb"].split()[0]  # verb index str e.g. "saw"
    # locate verb token
for t in proto_tuples:
    # attach nearest governing verb's eid (fallback e1)
    head_i = doc[t["span"][0]].head.i
    t["eid"] = eids.get(head_i, "e1")

print("EIDs:", eids)


In [None]:
nodes = []
node_index = {}            # map filler@eid → id for dedup

def slug(text):            # deterministic, lowercase, no punct
    return re.sub(r"[^a-z0-9]+","",text.lower())[:32] or "unk"

for tup in proto_tuples:
    base = f"spo:{slug(tup['text'])}@{tup['eid']}"
    if base not in node_index:
        n_id = base
        node_index[base] = n_id
        nodes.append({
            "id": n_id,
            "filler": tup["text"],
            "roles": [tup["role"]],
            "eid_set": [tup["eid"]],
            "ntype": "spo",
        })
    else:
        # add missing role to existing node
        for n in nodes:
            if n["id"] == base and tup["role"] not in n["roles"]:
                n["roles"].append(tup["role"])

# create predicate nodes (explicit)
for tok_i,eid in eids.items():
    pred = doc[tok_i].lemma_
    n_id = f"spo:{slug(pred)}@{eid}"
    if n_id not in node_index:
        node_index[n_id] = n_id
        nodes.append({
            "id": n_id, "filler": pred, "roles": ["Predicate"],
            "eid_set": [eid], "ntype": "spo"
        })

# event stubs
for eid in set(eids.values()):
    nodes.append({"id": f"evt:{eid}", "filler": eid,
                  "roles": ["Event"], "eid_set":[eid], "ntype":"event"})

# single CHV hub
nodes.append({"id":"chv:main","filler":"CHV","roles":["CHV"],
              "eid_set":[],"ntype":"chv"})

print("Nodes →", len(nodes))


In [None]:
edges = []

def add_edge(src,tgt,kind):
    edges.append({"source":src,"target":tgt,"kind":kind})

# S‑P & P‑O
for tup in proto_tuples:
    if tup["role"] == "Subject":
        pred_id = f"spo:{slug(doc[tup['span'][0]].head.lemma_)}@{tup['eid']}"
        add_edge(f"spo:{slug(tup['text'])}@{tup['eid']}", pred_id, "S-P")
    if tup["role"] == "Object":
        pred_id = f"spo:{slug(doc[tup['span'][0]].head.lemma_)}@{tup['eid']}"
        add_edge(pred_id, f"spo:{slug(tup['text'])}@{tup['eid']}", "P-O")

# event‑pred
for eid in set(eids.values()):
    subj_nodes = [n for n in nodes if "Subject" in n["roles"] and eid in n["eid_set"]]
    if subj_nodes:
        add_edge(f"evt:{eid}", subj_nodes[0]["id"], "event-pred")

# binder edge (outermost object → CHV)
outer_eid = min(set(eids.values()))
last_obj = [n["id"] for n in nodes if "Object" in n["roles"] and outer_eid in n["eid_set"]][-1]
add_edge(last_obj, "chv:main", "binder")

print("Edges →", len(edges))


In [None]:
hulls = []
for eid in set(eids.values()):
    members = [n["id"] for n in nodes if eid in n["eid_set"] and n["ntype"]=="spo"]
    if members:
        hulls.append({"eid":eid,"members":members})
print("Hull list:", hulls)


In [None]:
payload = {
    "version": "2.1",
    "sentence": SENTENCE,
    "nodes": nodes,
    "edges": edges,
    "layouts": {"hulls": hulls},
}

print(json.dumps(payload, indent=2, ensure_ascii=False))


In [None]:
# >>> place JSON‑Schema v2.1 in ./schema.json
from jsonschema import Draft202012Validator
import yaml, pprint

SCHEMA = json.load(open("schema.json"))
Draft202012Validator.check_schema(SCHEMA)
Draft202012Validator(SCHEMA).validate(payload)
print("✅  schema‑valid")
