In [1]:
import spacy
from tabulate import tabulate
import time
import spacy
import re
from spacy import displacy


# Load spaCy's English medium model
nlp = spacy.load("ko_core_news_lg")

# Dep Tree + Regex + Context Window

In [4]:
# Example sentence (change this to test)
sentence = "1만 원 이하 저렴한 S24"
doc = nlp(sentence)

# Target noun/product keyword
target = "S24"

# Storage
related_adjs = []
related_adps = []
is_part_of_root_noun = False

# Identify root token
root = next((token for token in doc if token.head == token), None)

# Helper: check if token matches target robustly
def matches_target(token, target):
    return (
        target.lower() in token.lemma_.lower()
        or target.lower() in token.text.lower()
    )

# Try dependency-based matching first
target_tokens = [t for t in doc if matches_target(t, target)]

if not target_tokens:
    # If no lemma/POS match, try regex surface search
    pattern = re.compile(re.escape(target), re.IGNORECASE)
    target_tokens = [t for t in doc if pattern.fullmatch(t.text)]

for target_token in target_tokens:
    # Check if root or attached to root
    if target_token == root or target_token.head == root:
        is_part_of_root_noun = True

    # If POS tagging seems valid (not just X), use dependency
    if target_token.pos_ != "X":
        for child in target_token.children:
            if child.pos_ == "ADJ":
                related_adjs.append(child.text)
            if child.pos_ == "ADP":
                related_adps.append(child.text)
    else:
        # POS is unreliable -> surface-based scan
        idx = target_token.i
        for left in doc[max(0, idx-3):idx]:
            if left.pos_ in ["ADJ", "NOUN", "PROPN", "X"]:
                related_adjs.append(left.text)
        for right in doc[idx+1:min(len(doc), idx+3)]:
            if right.pos_ in ["ADJ", "NOUN", "PROPN", "X"]:
                related_adjs.append(right.text)

# Deduplicate
related_adjs = list(set(related_adjs))
related_adps = list(set(related_adps))

# Results
print(f"형용사/수식어(Modifiers) 관련 '{target}': {related_adjs}")
print(f"조사/전치사(Adpositions) 관련 '{target}': {related_adps}")
print(f"'{target}'가 루트 명사이거나 루트에 연결? {is_part_of_root_noun}")

# Optional: visualize dependency tree
# displacy.serve(doc, style="dep")


형용사/수식어(Modifiers) 관련 'S24': ['원', '이하']
조사/전치사(Adpositions) 관련 'S24': []
'S24'가 루트 명사이거나 루트에 연결? True
