In [3]:
import pandas as pd
import re
from typing import TypedDict, Any

In [5]:
# --- State 정의 ---
class KWState(TypedDict):
    path_in: str
    df: Any
    params: dict

# --- 키워드 분류 규칙 ---
DIRECT_PATTERNS = [
    r"\bchicken shredder\b",
    r"\bmeat shredder\b",
    r"\bshredder\b",
    r"\bshred\b",
    r"\bshredding\b",
    r"\b(claw|claws)\b",
    r"\bpulled (chicken|pork)\b",
]

INTERMEDIATE_TERMS = [
    "kitchen", "cooking", "cook", "gadget", "gadgets",
    "tool", "tools", "utensil", "utensils",
    "food", "prep", "meat", "accessories",
    "countertop", "appliance", "grinder", "slicer", "chopper"
]

INDIRECT_TERMS = [
    "gift", "gifts", "christmas", "holiday", "bbq", "barbecue",
    "father", "mother", "dad", "mom", "party", "summer", "winter",
    "stocking", "anniversary", "birthday"
]

def classify_keyword(kw: str) -> str:
    if not isinstance(kw, str):
        return "None"
    low = kw.lower().strip()
    # Direct
    for pat in DIRECT_PATTERNS:
        if re.search(pat, low):
            return "Direct"
    # Indirect
    for t in INDIRECT_TERMS:
        if t in low:
            return "Indirect"
    # Intermediate
    for t in INTERMEDIATE_TERMS:
        if t in low:
            return "Intermediate"
    # Long-tail → Indirect
    tokens = re.findall(r"[a-z0-9]+", low)
    if len(tokens) >= 3:
        return "Indirect"
    return "None"


# --- ingest_csv 노드 ---
def ingest_csv(state: KWState) -> KWState:
    path = state["path_in"]
    df = pd.read_csv(path, encoding="utf-8-sig")

    # 컬럼 자동 탐색
    def find_col(df, candidates):
        for c in df.columns:
            cl = c.lower().replace(" ", "")
            for cand in candidates:
                if cand in cl:
                    return c
        return None

    kw_col = find_col(df, ["keyword", "keywords", "키워드"])
    sv_col = find_col(df, ["searchvolume", "search_volume", "sv", "검색"])
    cp_col = find_col(df, ["competingproducts", "competition", "경쟁"])

    if not kw_col or not sv_col or not cp_col:
        raise ValueError("필수 컬럼(키워드, Search Volume, Competing Products)을 찾을 수 없습니다.")

    # 숫자형 정리
    def to_num(x):
        try:
            return float(str(x).replace(",", "").strip())
        except:
            return None

    df["_SV"] = df[sv_col].apply(to_num)
    df["_CP"] = df[cp_col].apply(to_num)

    # 분류
    df["Linkage_Class"] = df[kw_col].apply(classify_keyword)

    state["df"] = df
    state["params"] = {
        "kw_col": kw_col,
        "sv_col": sv_col,
        "cp_col": cp_col,
        "alpha": 0.65  # Value Score 가중치
    }
    return state


# --- normalize_metrics 노드 ---
def normalize_metrics(state: KWState) -> KWState:
    df = state["df"]
    alpha = state["params"]["alpha"]

    def minmax(series):
        s = series.fillna(series.median())
        s_min, s_max = s.min(), s.max()
        if s_min == s_max:
            return pd.Series([0.5]*len(s), index=s.index)
        return (s - s_min) / (s_max - s_min)

    df["SV_norm"] = minmax(df["_SV"])
    df["CP_norm"] = minmax(df["_CP"])
    df["Value_Score"] = alpha * df["SV_norm"] + (1 - alpha) * (1 - df["CP_norm"])

    state["df"] = df
    return state
