In [1]:
# save as: make_food_tags_test.py
# usage (그냥 실행하면 test.csv를 읽어서 test_with_tags.csv로 저장):
#   python make_food_tags_test.py
# 필요 패키지: pandas (없으면: pip install pandas)

import re
import os
import json
from typing import Tuple, List, Set, Dict, Any

import pandas as pd


# ========== 유틸 ==========
def coerce_num(x):
    """문자에서 숫자만 추출해 float로 변환. '15 g', '120mg', '1,234' 등 처리."""
    if x is None or (isinstance(x, float) and pd.isna(x)):
        return None
    if isinstance(x, (int, float)):
        return float(x)
    s = str(x).strip().replace(",", "")
    m = re.search(r"-?\d+(?:\.\d+)?", s)
    return float(m.group(0)) if m else None

def add_exclusive(tag_set: Set[str], new_tag: str, group_map: Dict[str, Dict[str, str]]):
    """같은 그룹의 상반 태그가 동시에 들어가지 않게 관리."""
    for _, opts in group_map.items():
        if new_tag in opts.values():
            for v in opts.values():
                if v in tag_set and v != new_tag:
                    tag_set.discard(v)
            tag_set.add(new_tag)
            return
    tag_set.add(new_tag)


# ========== 규칙/매핑 ==========
EXCLUSIVE_GROUPS = {
    "macro:protein": {"high":"macro:high-protein","low":"macro:low-protein"},
    "macro:fat":     {"high":"macro:high-fat","low":"macro:low-fat"},
    "macro:carb":    {"high":"macro:high-carb","low":"macro:low-carb"},
    "macro:sugar":   {"high":"macro:high-sugar","low":"macro:low-sugar"},
    "diet:sodium":   {"high":"diet:high-sodium","low":"diet:low-sodium"},
    "course":        {"breakfast":"course:breakfast","lunch":"course:lunch","dinner":"course:dinner"},
    "difficulty":    {"easy":"difficulty:easy","normal":"difficulty:normal","full":"difficulty:full"},
    "cuisine":       {"korean":"cuisine:korean","western":"cuisine:western","japanese":"cuisine:japanese"},
    "spice":         {"mild":"spice:mild","spicy":"spice:spicy"},
}

DISPLAY_LABELS = {
    "macro:high-protein":"고단백","macro:low-protein":"저단백",
    "macro:high-fat":"고지방","macro:low-fat":"저지방",
    "macro:high-carb":"고탄수","macro:low-carb":"저탄수",
    "macro:high-sugar":"고당","macro:low-sugar":"저당",
    "diet:high-sodium":"고나트륨","diet:low-sodium":"저나트륨",
    "goal:weight-loss":"다이어트형","goal:glycemic-control":"혈당조절형",
    "goal:muscle-gain":"근육증진형","goal:balanced":"균형식형",
    "cuisine:korean":"한식","cuisine:western":"양식","cuisine:japanese":"일식",
    "course:breakfast":"아침추천","course:lunch":"점심추천","course:dinner":"저녁추천",
    "difficulty:easy":"간편조리","difficulty:normal":"일반조리","difficulty:full":"정식조리",
    "spice:mild":"순한맛","spice:spicy":"매운맛",
}

# 한글/영문 컬럼 자동 매핑 후보
CAND_COLS = {
    "food_name":   ["food_name","식품명","식품명(국문)","메뉴명","품목명","식품"],
    "protein_g":   ["protein_g","단백질(g)","단백질 (g)","단백질"],
    "fat_g":       ["fat_g","지방(g)","지방 (g)","지방"],
    "carb_g":      ["carb_g","탄수화물(g)","탄수화물 (g)","탄수화물"],
    "sugar_g":     ["sugar_g","당류(g)","총당류(g)","총당류 (g)","당류 (g)","당류"],
    "sodium_mg":   ["sodium_mg","나트륨(mg)","나트륨 (mg)","나트륨"],
    "kcal_per100": ["kcal_per100","에너지(kcal)","에너지 (kcal)","열량(kcal)","열량 (kcal)","칼로리(kcal)"],
}


# ========== 태그 생성 ==========
def build_food_tags(row: Dict[str, Any]) -> Tuple[List[str], List[str]]:
    name = str(row.get("food_name", "") or "")
    protein = float(row.get("protein_g", 0) or 0)
    fat     = float(row.get("fat_g", 0) or 0)
    carb    = float(row.get("carb_g", 0) or 0)
    sugar   = float(row.get("sugar_g", 0) or 0)
    sodium  = float(row.get("sodium_mg", 0) or 0)

    norm: Set[str] = set()
    disp: Set[str] = set()

    # 영양소 기반
    if protein >= 15: add_exclusive(norm, "macro:high-protein", EXCLUSIVE_GROUPS)
    elif protein <= 5: add_exclusive(norm, "macro:low-protein", EXCLUSIVE_GROUPS)

    if fat >= 15: add_exclusive(norm, "macro:high-fat", EXCLUSIVE_GROUPS)
    elif fat <= 5: add_exclusive(norm, "macro:low-fat", EXCLUSIVE_GROUPS)

    if carb >= 25: add_exclusive(norm, "macro:high-carb", EXCLUSIVE_GROUPS)
    elif carb <= 10: add_exclusive(norm, "macro:low-carb", EXCLUSIVE_GROUPS)

    if sugar >= 10: add_exclusive(norm, "macro:high-sugar", EXCLUSIVE_GROUPS)
    elif sugar <= 3: add_exclusive(norm, "macro:low-sugar", EXCLUSIVE_GROUPS)

    if sodium >= 700: add_exclusive(norm, "diet:high-sodium", EXCLUSIVE_GROUPS)
    elif sodium <= 200: add_exclusive(norm, "diet:low-sodium", EXCLUSIVE_GROUPS)

    # 건강 목표
    if ("macro:high-protein" in norm) and ("macro:low-carb" in norm):
        norm.add("goal:weight-loss")
    if ("macro:low-sugar" in norm) and ("macro:low-carb" in norm):
        norm.add("goal:glycemic-control")
    if ("macro:high-protein" in norm) and ("macro:high-fat" not in norm):
        norm.add("goal:muscle-gain")
    if not any(g in norm for g in ["goal:weight-loss","goal:glycemic-control","goal:muscle-gain"]):
        norm.add("goal:balanced")
    if "diet:low-sodium" in norm:
        disp.add("저염식형")  # UI용 표시 태그

    # 이름 기반 맥락
    if any(k in name for k in ["김치", "비빔밥", "불고기", "된장", "두부", "나물", "탕", "찌개"]):
        add_exclusive(norm, "cuisine:korean", EXCLUSIVE_GROUPS)
    elif any(k in name for k in ["파스타", "샐러드", "스테이크", "수프"]):
        add_exclusive(norm, "cuisine:western", EXCLUSIVE_GROUPS)
    elif any(k in name for k in ["초밥", "우동", "돈까스", "덮밥"]):
        add_exclusive(norm, "cuisine:japanese", EXCLUSIVE_GROUPS)

    if any(k in name for k in ["죽", "토스트", "오트밀", "요거트"]):
        add_exclusive(norm, "course:breakfast", EXCLUSIVE_GROUPS)
    elif any(k in name for k in ["샐러드", "비빔밥", "덮밥", "볶음밥"]):
        add_exclusive(norm, "course:lunch", EXCLUSIVE_GROUPS)
    elif any(k in name for k in ["찜", "스테이크", "탕", "찌개", "국"]):
        add_exclusive(norm, "course:dinner", EXCLUSIVE_GROUPS)

    if any(k in name for k in ["샐러드", "토스트", "오트밀"]):
        add_exclusive(norm, "difficulty:easy", EXCLUSIVE_GROUPS)
    elif any(k in name for k in ["볶음", "덮밥", "구이"]):
        add_exclusive(norm, "difficulty:normal", EXCLUSIVE_GROUPS)
    elif any(k in name for k in ["탕", "찌개", "찜"]):
        add_exclusive(norm, "difficulty:full", EXCLUSIVE_GROUPS)

    if re.search("매운|마라|청양|고추장|불", name):
        add_exclusive(norm, "spice:spicy", EXCLUSIVE_GROUPS)
    elif re.search("담백|순한|깔끔", name):
        add_exclusive(norm, "spice:mild", EXCLUSIVE_GROUPS)

    # 디스플레이 태그 구성
    for t in norm:
        if t in DISPLAY_LABELS:
            disp.add(DISPLAY_LABELS[t])

    return sorted(norm), sorted(disp)


def to_korean_phrase(tag: str) -> str:
    mapping = {
        "macro:high-protein":"고단백","macro:low-protein":"저단백",
        "macro:high-fat":"고지방","macro:low-fat":"저지방",
        "macro:high-carb":"고탄수","macro:low-carb":"저탄수",
        "macro:high-sugar":"고당","macro:low-sugar":"저당",
        "diet:high-sodium":"고나트륨","diet:low-sodium":"저나트륨",
        "goal:weight-loss":"다이어트 적합","goal:glycemic-control":"혈당 조절 적합",
        "goal:muscle-gain":"근육 증진 적합","goal:balanced":"균형식",
        "cuisine:korean":"한식","cuisine:western":"양식","cuisine:japanese":"일식",
        "course:breakfast":"아침용","course:lunch":"점심용","course:dinner":"저녁용",
        "difficulty:easy":"간편 조리","difficulty:normal":"일반 조리","difficulty:full":"정식 조리",
        "spice:mild":"순한 맛","spice:spicy":"매운 맛",
    }
    return mapping.get(tag, tag)

def build_content(row: Dict[str, Any], norm_tags: List[str]) -> str:
    """임베딩용 짧은 소개 + 태그 요약."""
    name = str(row.get("food_name", "") or "")
    kcal = row.get("kcal_per100") or row.get("kcal") or row.get("energy_kcal")
    protein = row.get("protein_g")
    parts = []
    lead = name.strip() if name else "메뉴"
    parts.append(f"{lead}. ")
    hilites = []
    if kcal is not None: hilites.append(f"100g 당 {float(kcal):.0f}kcal")
    if protein is not None: hilites.append(f"단백질 {float(protein):.1f}g")
    if hilites: parts.append(", ".join(hilites) + ". ")
    phrases = [to_korean_phrase(t) for t in norm_tags][:6]
    if phrases: parts.append("특징: " + " · ".join(phrases) + ". ")
    if norm_tags: parts.append("태그: " + ", ".join(norm_tags))
    return "".join(parts).strip()


# ========== 컬럼 매핑/로딩 ==========
def map_columns(df_raw: pd.DataFrame) -> pd.DataFrame:
    colmap = {}
    for std, cands in CAND_COLS.items():
        for c in cands:
            if c in df_raw.columns:
                colmap[std] = c
                break
    missing = [k for k in ["food_name","protein_g","fat_g","carb_g","sugar_g","sodium_mg"] if k not in colmap]
    if missing:
        raise ValueError(f"필수 컬럼 매핑 실패: {missing}\nCSV 컬럼: {list(df_raw.columns)}")
    df = pd.DataFrame()
    for std, src in colmap.items():
        df[std] = df_raw[src]
    # 숫자 컬럼 정제
    for nc in ["protein_g","fat_g","carb_g","sugar_g","sodium_mg","kcal_per100"]:
        if nc in df.columns:
            df[nc] = df[nc].apply(coerce_num)
    return df

def load_test_csv(path: str) -> pd.DataFrame:
    ext = os.path.splitext(path)[1].lower()
    if ext in [".xlsx", ".xls"]:
        return pd.read_excel(path)
    # CSV 인코딩 탐색
    for enc in ["utf-8-sig","cp949","euc-kr","utf-8"]:
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception:
            continue
    return pd.read_csv(path)


# ========== 메인 ==========
def main():
    input_path = "test.csv"
    output_path = "test_with_tags.csv"

    df_raw = load_test_csv(input_path)
    df_std = map_columns(df_raw)

    norm_tags_col, display_tags_col, content_col = [], [], []
    for _, r in df_std.iterrows():
        norm, disp = build_food_tags(r.to_dict())
        norm_tags_col.append(norm)
        display_tags_col.append(disp)
        content_col.append(build_content(r.to_dict(), norm))

    out = df_raw.copy()
    out["norm_tags"] = [json.dumps(t, ensure_ascii=False) for t in norm_tags_col]
    out["display_tags"] = [json.dumps(t, ensure_ascii=False) for t in display_tags_col]
    out["content"] = content_col

    out.to_csv(output_path, index=False, encoding="utf-8-sig")
    print(f"[OK] 저장 완료 → {os.path.abspath(output_path)}")
    print(out.head(5)[["norm_tags","display_tags","content"]])


if __name__ == "__main__":
    main()


[OK] 저장 완료 → c:\githome\today\test_with_tags.csv
                                           norm_tags  \
0  ["goal:balanced", "macro:high-carb", "macro:lo...   
1  ["goal:balanced", "macro:high-carb", "macro:lo...   
2  ["goal:balanced", "macro:high-carb", "macro:lo...   
3  ["diet:low-sodium", "goal:balanced", "macro:hi...   
4  ["goal:balanced", "macro:high-carb", "macro:lo...   

                                   display_tags  \
0           ["고탄수", "균형식형", "저단백", "저당", "저지방"]   
1           ["고탄수", "균형식형", "저단백", "저당", "저지방"]   
2           ["고탄수", "균형식형", "저단백", "저당", "저지방"]   
3  ["고탄수", "균형식형", "저나트륨", "저당", "저염식형", "저지방"]   
4           ["고탄수", "균형식형", "저단백", "저당", "저지방"]   

                                             content  
0  가래떡. 단백질 3.9g. 특징: 균형식 · 고탄수 · 저지방 · 저단백 · 저당....  
1  깨송편. 단백질 4.7g. 특징: 균형식 · 고탄수 · 저지방 · 저단백 · 저당....  
2  꿀떡. 단백질 3.7g. 특징: 균형식 · 고탄수 · 저지방 · 저단백 · 저당. ...  
3  모듬찰떡. 단백질 6.8g. 특징: 저나트륨 · 균형식 · 고탄수 · 저지방 · 저...  
4  백설기. 단백질 3.7g. 특징: 균형식 · 고탄수 