# Final KO POS

In [6]:
import spacy
from spacy import displacy

# Load Korean model
nlp = spacy.load("ko_core_news_lg")

In [8]:
def is_hangul(s: str) -> bool:
    return all('\uac00' <= ch <= '\ud7a3' for ch in s)

def is_target_noun_token(token):
    """Check if token should be considered as a target noun candidate."""
    return (
        token.pos_ in ("NOUN", "PROPN", "X")
        or (token.is_alpha and not is_hangul(token.text))
    )

def extract_product_phrases_info(query: str, target_phrases_list):
    doc = nlp(query)
    results = {}

    modifier_labels = {"amod", "nmod", "compound", "det", "acl"}

    for target_phrase in target_phrases_list:
        target_tokens = target_phrase.split()
        adjectives_all = []
        adps_all = []
        is_main_product = False

        for token in doc:
            if token.text in target_tokens:
                # Collect adjectives
                adjectives = [child.text for child in token.children if child.dep_ in modifier_labels or child.pos_ in ("ADJ", "VERB")]
                if not adjectives and token.i > 0:
                    left = doc[token.i - 1]
                    if left.pos_ in ("ADJ", "VERB"):
                        adjectives.append(left.text)
                adjectives_all.extend(adjectives)

                # Collect ADPs / case markers
                adps = [child.text for child in token.children if child.dep_ == "case" or child.pos_ == "ADP"]
                adps_all.extend(adps)

                # Main product flag
                if token.dep_ == "ROOT" or token.head.dep_ == "ROOT" or token.dep_ in ("nsubj", "dep"):
                    is_main_product = True

        results[target_phrase] = {
            "adjectives": list(set(adjectives_all)),
            "adp": list(set(adps_all)),
            "is_main_product": is_main_product
        }

    return results

In [10]:
query = "저렴한 s24 폰 보여주세요"
target_phrases = ["s24","폰"]

results = extract_product_phrases_info(query, target_phrases)

print(f"Query: {query}")
print(f"Target tokens: {target_phrases}")
print("Results:")
print(json.dumps(results, ensure_ascii=False, indent=4))

Query: 저렴한 s24 폰 보여주세요
Target tokens: ['s24', '폰']
Results:
{
    "s24": {
        "adjectives": [
            "저렴한"
        ],
        "adp": [],
        "is_main_product": false
    },
    "폰": {
        "adjectives": [
            "저렴한",
            "s24"
        ],
        "adp": [],
        "is_main_product": true
    }
}


# POS Tagger

In [13]:
sentence = "저렴한 s24 폰 보여주세요"
doc = nlp(sentence)

# Print tokens with POS/dep info (for debugging)
for token in doc:
    print(f"{token.text:6} | POS={token.pos_:5} | DEP={token.dep_:10} | HEAD={token.head.text}")

저렴한    | POS=VERB  | DEP=amod       | HEAD=폰
s24    | POS=X     | DEP=compound   | HEAD=폰
폰      | POS=NOUN  | DEP=dep        | HEAD=보여주세요
보여주세요  | POS=AUX   | DEP=ROOT       | HEAD=보여주세요


# Dep Tree

In [16]:
doc = nlp("저렴한 s24 폰 보여주세요")
displacy.render(doc, style="dep", jupyter=True, options={"compact": True})