In [32]:
import spacy
from spacy.language import Language
nlp = spacy.load("en_core_web_md")

# Nearest root

In [66]:
def closest_to_root(query, candidates):
    doc = nlp(query)

    def distance_from_root(token):
        dist = 0
        while token.head != token:  # root's head is itself
            token = token.head
            dist += 1
        return dist

    results = []
    for phrase in candidates:
        start = doc.text.lower().find(phrase.lower())
        if start == -1:
            continue
        span = doc.char_span(start, start + len(phrase), alignment_mode="expand")
        if not span:
            continue
        dist = distance_from_root(span.root)
        results.append((phrase, dist))

    if not results:
        return None  # nothing matched
    return min(results, key=lambda x: x[1])  # lowest distance

In [70]:
# Example
query = "show me s24 phone with good ram for best gaming"
candidates = ["s24 phone", "ram", "gaming"]

print(closest_to_root(query, candidates))

('s24 phone', 1)


# Override

In [62]:
import spacy

nlp = spacy.load("en_core_web_sm")

# Ensure attribute ruler is in the pipeline and runs before the parser
if "attribute_ruler" in nlp.pipe_names:
    ruler = nlp.get_pipe("attribute_ruler")
else:
    # Put it before the parser so the parser uses your POS overrides
    if "parser" in nlp.pipe_names:
        ruler = nlp.add_pipe("attribute_ruler", before="parser")
    else:
        ruler = nlp.add_pipe("attribute_ruler", first=True)

# Your POS-only overrides
overrides = {
    "watch": "NOUN",
    "top": "ADJ",
    # add more...
}

# Register rules (one per word)
for word, pos in overrides.items():
    # single-token pattern → [[{...}]]  (list of alternative sequences)
    ruler.add([[{"LOWER": word}]], {"POS": pos})

# Test
doc = nlp("best watch deals and top phones")
for t in doc:
    print(f"{t.text:10} POS={t.pos_:4} DEP={t.dep_:10} HEAD={t.head.text}")


best       POS=ADJ  DEP=amod       HEAD=watch
watch      POS=NOUN DEP=ROOT       HEAD=watch
deals      POS=NOUN DEP=dobj       HEAD=watch
and        POS=CCONJ DEP=cc         HEAD=deals
top        POS=ADJ  DEP=amod       HEAD=phones
phones     POS=NOUN DEP=conj       HEAD=deals
