# 1. event_tuple_extractor.py

In [None]:
#!/usr/bin/env python3
"""
event_tuple_extractor.py

Deterministically extract events (Predicate + SPO roles + attributes)
from text, including subordinate clauses (xcomp, ccomp).

This version allows editing the input sentence at the top of the script.

Edit below:
"""
USER_INPUT = "I saw a white dog chase the brown cat quickly in the backyard."

import spacy

nlp = spacy.load("en_core_web_sm")

# Pronouns for heuristic resolution
PRONOUNS = {"he", "she", "it", "they", "him", "her", "them"}

# Mapping UD deps → roles
SPO_MAP = {
    "nsubj": "Subject",
    "nsubjpass": "Subject",
    "dobj": "Object",
    "iobj": "IndirectObject"
}
ATTR_DEPS = {"amod", "compound"}  # modifiers on nouns
ADV_DEP = "advmod"                # adverbial modifiers
TENSE_MAP = {
    "VBD": "past",
    "VBP": "present",
    "VBZ": "present",
    "VBG": "present-participle",
    "VBN": "past-participle",
    "VB": "base"
}

def resolve_pronouns(text: str) -> str:
    """Replace pronouns with the most recent noun phrase."""
    doc = nlp(text)
    tokens, last_np = [], None
    for tok in doc:
        low = tok.text.lower()
        if low in PRONOUNS and last_np:
            tokens.append(last_np)
        else:
            tokens.append(tok.text)
        # update last_np when encountering a noun phrase head
        if tok.dep_ in {"nsubj", "dobj", "pobj", "iobj", "appos"} and tok.pos_ in {"NOUN", "PROPN"}:
            span = " ".join(w.text for w in tok.subtree)
            last_np = span
    return " ".join(tokens)

def extract_events(text: str):
    """Extract a list of nested event dicts from resolved text."""
    doc = nlp(text)
    events = []
    for tok in doc:
        if tok.pos_ == "VERB":
            evt = {
                "role": "Predicate",
                "filler": tok.lemma_.lower(),
                "attributes": [c.text.lower() for c in tok.children if c.dep_ == ADV_DEP]
            }
            if tok.tag_ in TENSE_MAP:
                evt["tense"] = TENSE_MAP[tok.tag_]
            events.append(evt)

            for child in tok.children:
                if child.dep_ in SPO_MAP:
                    role = SPO_MAP[child.dep_]
                    attrs = [gc.text.lower() for gc in child.children if gc.dep_ in ATTR_DEPS]
                    events.append({
                        "role": role,
                        "filler": child.text.lower(),
                        "attributes": attrs
                    })
    return events

if __name__ == "__main__":
    resolved = resolve_pronouns(USER_INPUT)
    evts = extract_events(resolved)
    print("\nResolved & extracted event tuples:\n")
    for e in evts:
        print(e)


# 2. enhanced_event_extractor.py

In [None]:
#!/usr/bin/env python3
"""
enhanced_event_extractor.py

Deterministically extract all event tuples (Predicate + SPO + attributes)
including subordinate clauses (xcomp, ccomp) from input text.

To use:
    Simply modify the `USER_INPUT` string below.
"""

# EDIT THIS SENTENCE AS INPUT
USER_INPUT = "I saw a white dog chase the brown cat quickly in the backyard."

import spacy

nlp = spacy.load("en_core_web_sm")

# Pronoun resolution heuristic
PRONOUNS = {"he", "she", "it", "they", "him", "her", "them"}

# Dependencies mapping
SUBJ_DEPS = {"nsubj", "nsubjpass"}
OBJ_DEPS  = {"dobj", "iobj", "pobj"}
ATTR_DEPS = {"amod", "compound"}
ADV_DEP   = "advmod"

# POS tag → tense
TENSE_MAP = {
    "VBD": "past",
    "VBP": "present",
    "VBZ": "present",
    "VBG": "present-participle",
    "VBN": "past-participle",
    "VB":  "base"
}

def resolve_pronouns(text: str) -> str:
    """Replace pronouns with the nearest preceding noun phrase."""
    doc = nlp(text)
    tokens, last_np = [], None
    for tok in doc:
        low = tok.text.lower()
        if low in PRONOUNS and last_np:
            tokens.append(last_np)
        else:
            tokens.append(tok.text)
        # update last_np on NP heads (noun or proper noun)
        if tok.dep_ in SUBJ_DEPS | OBJ_DEPS | {"appos"} and tok.pos_ in {"NOUN", "PROPN"}:
            span = " ".join(w.text for w in tok.subtree)
            last_np = span
    return " ".join(tokens)

def extract_events(text: str):
    """Extract events from resolved text."""
    doc = nlp(text)
    events = []
    seen_verbs = set()
    for tok in doc:
        if tok.pos_ == "VERB" and tok.i not in seen_verbs:
            seen_verbs.add(tok.i)

            # 1. Predicate
            evt = {
                "role": "Predicate",
                "filler": tok.lemma_.lower(),
                "attributes": [c.text.lower() for c in tok.children if c.dep_ == ADV_DEP]
            }
            if tok.tag_ in TENSE_MAP:
                evt["tense"] = TENSE_MAP[tok.tag_]
            events.append(evt)

            # 2. Subjects
            for child in tok.children:
                if child.dep_ in SUBJ_DEPS:
                    mods = [gc.text.lower() for gc in child.children if gc.dep_ in ATTR_DEPS]
                    events.append({
                        "role": "Subject",
                        "filler": child.text.lower(),
                        "attributes": mods
                    })

            # 3. Objects
            for child in tok.children:
                if child.dep_ in OBJ_DEPS:
                    mods = [gc.text.lower() for gc in child.children if gc.dep_ in ATTR_DEPS]
                    events.append({
                        "role": "Object",
                        "filler": child.text.lower(),
                        "attributes": mods
                    })
    return events

if __name__ == "__main__":
    resolved = resolve_pronouns(USER_INPUT)
    events = extract_events(resolved)
    print("\nResolved & extracted event tuples:\n")
    for e in events:
        print(e)


# 3. tuple_extractor_rule_coref.py

In [None]:
#!/usr/bin/env python3
"""
tuple_extractor_rule_coref.py

Heuristic pronoun resolution + tuple extraction.

To use:
    Simply modify the `USER_INPUT` string below.
"""

# EDIT THIS INPUT
USER_INPUT = "Alice saw her dog. She then walked it home."

import spacy

nlp = spacy.load("en_core_web_sm")

# Pronouns to resolve (lowercased)
PRONOUNS = {"he", "she", "it", "they", "him", "her", "them"}

# Maps
SPO_MAP = {
    "nsubj": "Subject",
    "dobj": "Object",
    "iobj": "IndirectObject"
}
MOD_DEPS = {"amod", "compound"}
ADV_DEP = "advmod"
TENSE_MAP = {
    "VBD": "past",
    "VBP": "present",
    "VBZ": "present",
    "VBG": "present-participle",
    "VBN": "past-participle",
    "VB":  "base"
}

def resolve_pronouns(text: str):
    """Replace pronouns with the most recent noun phrase head with modifiers."""
    doc = nlp(text)
    resolved_tokens = []
    last_np = None
    for token in doc:
        lower = token.text.lower()
        if lower in PRONOUNS and last_np:
            resolved_tokens.append(last_np)
        else:
            resolved_tokens.append(token.text)

        if token.dep_ in {"nsubj", "dobj", "pobj", "iobj", "appos", "compound"} and token.pos_ in {"NOUN", "PROPN"}:
            # Include adjectival or compound modifiers
            span = token.text
            for child in token.children:
                if child.dep_ in MOD_DEPS:
                    span = child.text + " " + span
            last_np = span
    return " ".join(resolved_tokens)

def extract_nested_tuples(text: str):
    """Extract predicate and SPO tuples with modifiers."""
    doc = nlp(text)
    tuples = []

    # 1. Root verb (Predicate + Tense + Adverbs)
    root = next((t for t in doc if t.dep_ == "ROOT" and t.pos_ == "VERB"), None)
    if root:
        attrs = [c.text for c in root.children if c.dep_ == ADV_DEP]
        entry = {
            "role": "Predicate",
            "filler": root.lemma_,
            "attributes": attrs
        }
        if root.tag_ in TENSE_MAP:
            entry["tense"] = TENSE_MAP[root.tag_]
        tuples.append(entry)

    # 2. Subjects / Objects with modifiers
    for token in doc:
        role = SPO_MAP.get(token.dep_)
        if role:
            attrs = [c.text for c in token.children if c.dep_ in MOD_DEPS]
            tuples.append({
                "role": role,
                "filler": token.text,
                "attributes": attrs
            })
    return tuples

if __name__ == "__main__":
    resolved = resolve_pronouns(USER_INPUT)
    result = extract_nested_tuples(resolved)
    print("\nResolved & extracted tuples:\n")
    for r in result:
        print(r)
