In [19]:
# -*- coding: utf-8 -*-
"""
🎯 (2024+2025) 축제명 + 월 → (데이터/규칙/LLM) 축제유형 판정 + 기간(월) 필터 → '축제명  시작일 ~ 종료일' 목록
- 로드: ./csv/{2024,2025}_축제_핵심필드.csv (+ /mnt/data 경로도 시도)
- 아직 시작 안 한 축제(시작일 > 오늘) 기본 제외(옵션으로 해제 가능)
- 입력 축제(또는 매칭명) 결과에서 제외
- 월 입력 포맷: 3  |  "3,4"  |  "3-5"  |  "봄/여름/가을/겨울"
- 출력:
    1) 탐지된 축제유형(+출처) / 타깃 월
    2) 동일 유형 & 같은 시기 축제 수
    3) 축제명  시작일 ~ 종료일
"""

import os, re, json
from pathlib import Path
from typing import Optional, Tuple, List, Dict, Set, Union

import pandas as pd
from difflib import get_close_matches
from dotenv import load_dotenv
from pydantic import BaseModel, Field
try:
    from pydantic import ConfigDict  # pydantic v2
except ImportError:
    ConfigDict = None
from langgraph.graph import StateGraph, START, END

# ---------------- Env / OpenAI ----------------
load_dotenv()
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")

_client_mode = None
try:
    from openai import OpenAI  # >=1.x
    _client = OpenAI()
    _client_mode = "new"
except Exception:
    try:
        import openai               # <=0.x
        openai.api_key = os.getenv("OPENAI_API_KEY", "")
        _client = openai
        _client_mode = "legacy"
    except Exception:
        _client = None
        _client_mode = None

# ---------------- Settings --------------------
ALLOWED_TYPES = ["문화예술", "지역특산물", "주민화합", "자연생태", "전통역사"]

CANDIDATE_FILES = [
    "./csv/2024_축제_핵심필드.csv",
    "./csv/2025_축제_핵심필드.csv",
    "/mnt/data/2024_축제_핵심필드.csv",
    "/mnt/data/2025_축제_핵심필드.csv",
]

RULE_KEYWORDS: Dict[str, List[str]] = {
    "문화예술": [
        "dj","edm","힙합","랩","kpop","케이팝","뮤직","음악","콘서트","페스티벌",
        "댄스","무용","버스킹","합창","연주","오케스트라","재즈","클래식",
        "연극","뮤지컬","영화","전시","미술","아트","사진","서커스","퍼포먼스","공연",
        "국악","산타","크리스마스"
    ],
    "지역특산물": [
        "특산","로컬","먹거리","미식","맛","장터","시장",
        "사과","포도","딸기","감자","고구마","수박","밤","고추","김치",
        "굴","장어","수산","해산물","한우","한돈","우유","와인","맥주","막걸리","커피","빵"
    ],
    "주민화합": [
        "군민","시민","도민","면민","주민","자치","화합","어울림","한마음",
        "한마당","체육","민속놀이","함께","가족","마을","공동체","축전"
    ],
    "자연생태": [
        "꽃","벚꽃","장미","튤립","코스모스","유채","단풍","억새",
        "나비","반딧불","곤충","철새","생태","습지","숲","수목원",
        "산","계곡","호수","바다","해변","섬","천문","별","야경","자연"
    ],
    "전통역사": [
        "전통","전통축제","역사","민속","향토","무형문화재","도자기","도예","옹기",
        "한지","한복","서예","한옥","고택","서원","향교","읍성","성곽","고분","왕릉","판소리"
    ],
}

# ---------------- Utils (공통) -----------------------
def normalize_text(s: str) -> str:
    if s is None: return ""
    s = str(s).strip().replace("\xa0", " ")
    return re.sub(r"\s+", "", s)

def parse_date_str(s: str) -> Optional[pd.Timestamp]:
    try:
        return pd.to_datetime(str(s), format="%Y-%m-%d", errors="coerce")
    except Exception:
        return None

def today_floor_ts() -> pd.Timestamp:
    return pd.Timestamp.today().normalize()

def standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
    colmap = {}
    for c in df.columns:
        cc = str(c).strip()
        if cc == "축제유형":
            colmap[c] = "축제 유형"
    return df.rename(columns=colmap)

def ensure_columns(df: pd.DataFrame, need_dates: bool = True) -> pd.DataFrame:
    df = standardize_columns(df)
    needed = ["연번", "광역자치단체명", "기초자치단체명", "축제명", "축제 유형"]
    for c in needed:
        if c not in df.columns:
            raise ValueError(f"필수 컬럼 누락: {c}")
    if need_dates and not {"시작일", "종료일"}.issubset(df.columns):
        raise ValueError("CSV에 '시작일','종료일' 컬럼이 필요합니다.")
    return df

def read_many_csv(paths: List[str]) -> pd.DataFrame:
    frames = []
    for p in paths:
        if Path(p).exists():
            try:
                df = pd.read_csv(p, dtype=str).fillna("")
            except UnicodeDecodeError:
                df = pd.read_csv(p, dtype=str, encoding="utf-8-sig").fillna("")
            df = ensure_columns(df, need_dates=True)
            df["_출처파일"] = Path(p).name
            frames.append(df)
    if not frames:
        raise FileNotFoundError("입력 CSV를 하나도 찾지 못했습니다.")
    out = pd.concat(frames, ignore_index=True)
    out = out.drop_duplicates(
        subset=["광역자치단체명","기초자치단체명","축제명","축제 유형","시작일","종료일"],
        keep="first"
    ).reset_index(drop=True)
    return out

# 월/계절 도우미
def months_in_range(start: Optional[pd.Timestamp], end: Optional[pd.Timestamp]) -> Set[int]:
    if pd.isna(start) or pd.isna(end): return set()
    if end < start: start, end = end, start
    months = set()
    cur = pd.Timestamp(year=start.year, month=start.month, day=1)
    last = pd.Timestamp(year=end.year, month=end.month, day=1)
    while cur <= last:
        months.add(int(cur.month))
        if cur.month == 12:
            cur = pd.Timestamp(year=cur.year + 1, month=1, day=1)
        else:
            cur = pd.Timestamp(year=cur.year, month=cur.month + 1, day=1)
    return months

def season_to_months(season: str) -> List[int]:
    season = (season or "").strip()
    return {"봄":[3,4,5], "여름":[6,7,8], "가을":[9,10,11], "겨울":[12,1,2]}.get(season, [])

def parse_months_input(inp: Optional[Union[str,int,List[int]]]) -> List[int]:
    """3 | '3,4' | '3-5' | '봄/여름/가을/겨울' → [int,...]"""
    if inp is None: return []
    if isinstance(inp, list):
        ms = []
        for x in inp:
            try:
                xi = int(x)
                if 1 <= xi <= 12: ms.append(xi)
            except: pass
        return sorted(list(dict.fromkeys(ms)))
    if isinstance(inp, int):
        return [inp] if 1 <= inp <= 12 else []
    s = str(inp).strip()
    if s in ["봄","여름","가을","겨울"]:
        return season_to_months(s)
    s = s.replace(" ", "")
    if "-" in s:
        a,b = s.split("-",1)
        try:
            a=int(a); b=int(b)
            if 1<=a<=12 and 1<=b<=12:
                if a<=b: return list(range(a,b+1))
                # 11-2 같은 랩어라운드
                seq = list(range(a,13))+list(range(1,b+1))
                return seq
        except: pass
    # 콤마 나열
    ms=[]
    for t in s.split(","):
        if not t: continue
        try:
            xi = int(t)
            if 1<=xi<=12: ms.append(xi)
        except: pass
    return sorted(list(dict.fromkeys(ms)))

# ---------------- 분류 유틸(데이터/규칙/LLM) ----------------
def detect_type_from_dataset(df: pd.DataFrame, name: str) -> Tuple[str, int, str, Optional[dict]]:
    exact = df[df["축제명"].astype(str).str.strip() == name.strip()]
    if not exact.empty:
        row = exact.iloc[0]; t = str(row["축제 유형"]).strip()
        evidence = {"source":"dataset","file": str(row.get("_출처파일","")), "연번": str(row.get("연번",""))}
        return t, len(exact), str(row["축제명"]), evidence

    nname = normalize_text(name)
    cand = df[df["축제명"].astype(str).apply(
        lambda x: normalize_text(x) in nname or nname in normalize_text(x)
    )]
    if not cand.empty:
        row = cand.iloc[0]; t = str(row["축제 유형"]).strip()
        evidence = {"source":"dataset","file": str(row.get("_출처파일","")), "연번": str(row.get("연번",""))}
        return t, len(cand), str(row["축제명"]), evidence

    all_names = df["축제명"].astype(str).tolist()
    hits = get_close_matches(name, all_names, n=1, cutoff=0.75)
    if hits:
        row = df[df["축제명"] == hits[0]].iloc[0]
        evidence = {"source":"dataset","file": str(row.get("_출처파일","")), "연번": str(row.get("연번",""))}
        return str(row["축제 유형"]).strip(), 1, str(row["축제명"]), evidence
    return "", 0, "", None

def _extract_json(text: str) -> Optional[dict]:
    if not text: return None
    m = re.search(r"\{.*\}", text, flags=re.S)
    if not m: return None
    try:
        return json.loads(m.group(0))
    except Exception:
        return None

def coerce_allowed(label: str) -> str:
    label = (label or "").strip()
    for t in ALLOWED_TYPES:
        if t == label: return t
    for t in ALLOWED_TYPES:
        if t in label: return t
    low = label.lower()
    mapping = {
        "art":"문화예술","music":"문화예술","culture":"문화예술",
        "food":"지역특산물","local":"지역특산물","specialty":"지역특산물",
        "community":"주민화합","harmony":"주민화합",
        "nature":"자연생태","eco":"자연생태",
        "tradition":"전통역사","history":"전통역사","heritage":"전통역사",
    }
    for k,v in mapping.items():
        if k in low: return v
    return ""

def llm_infer_type_with_evidence(name: str) -> Tuple[str, List[str]]:
    if _client_mode is None or _client is None: return "", []
    prompt = f"""
한국의 축제명: "{name}"

아래 JSON 으로만 출력하세요(설명 금지):
{{
  "label": "문화예술|지역특산물|주민화합|자연생태|전통역사",
  "hint_keywords": ["<이름에서 포착한 힌트 단어 최대 3개>"]
}}
- 반드시 위 5개 라벨 중 하나만 사용
- hint_keywords는 축제명에 실제로 보이는 단어/표현 위주로 간결히
"""
    try:
        if _client_mode == "new":
            resp = _client.chat.completions.create(
                model=OPENAI_MODEL,
                messages=[
                    {"role":"system","content":"Output a compact JSON with the label and up to 3 hint keywords. No explanations."},
                    {"role":"user","content":prompt},
                ],
                temperature=0, max_tokens=60,
            )
            content = resp.choices[0].message.content.strip()
        else:
            resp = _client.ChatCompletion.create(
                model=OPENAI_MODEL,
                messages=[
                    {"role":"system","content":"Output a compact JSON with the label and up to 3 hint keywords. No explanations."},
                    {"role":"user","content":prompt},
                ],
                temperature=0, max_tokens=60,
            )
            content = resp["choices"][0]["message"]["content"].strip()
        data = _extract_json(content) or {}
        label = coerce_allowed(data.get("label",""))
        hints  = data.get("hint_keywords", [])
        if not isinstance(hints, list): hints = []
        hints = [str(h) for h in hints if isinstance(h, (str,int))]
        return label, hints
    except Exception:
        return "", []

def collect_name_keyword_hits(name: str) -> Dict[str, List[str]]:
    nm = (name or "").lower()
    nm_plain = re.sub(r"\s+", "", nm)
    hits: Dict[str, List[str]] = {k: [] for k in RULE_KEYWORDS.keys()}
    for cat, kws in RULE_KEYWORDS.items():
        seen=set()
        for kw in kws:
            k = kw.lower()
            if (k in nm) or (k in nm_plain):
                if k not in seen:
                    hits[cat].append(kw); seen.add(k)
    return {k:v for k,v in hits.items() if v}

def rule_based_label(name_hits: Dict[str, List[str]]) -> str:
    if not name_hits: return ""
    strong = {
        "전통역사": {"전통","전통축제","민속","향토","무형문화재","한옥","서원","향교","읍성","성곽","왕릉","판소리"},
        "자연생태": {"벚꽃","장미","튤립","코스모스","유채","단풍","억새","나비","반딧불","생태","숲","수목원"},
        "지역특산물": {"특산","장터","시장","미식","먹거리","사과","포도","딸기","수산","막걸리","맥주","커피"},
        "문화예술": {"뮤직","음악","콘서트","공연","연극","뮤지컬","전시","페스티벌","재즈","클래식","힙합","kpop","케이팝"},
        "주민화합": {"군민","시민","도민","주민","화합","한마음","한마당","축전","공동체"},
    }
    scores = {k:0 for k in name_hits.keys()}
    for cat,kws in name_hits.items():
        for kw in kws:
            k = kw.strip().lower()
            scores[cat] += 3 if (kw in strong.get(cat,set()) or k in strong.get(cat,set())) else 1
    ranked = sorted(scores.items(), key=lambda x:x[1], reverse=True)
    top_cat, top_score = ranked[0]
    second_score = ranked[1][1] if len(ranked)>1 else 0
    if top_score==0: return ""
    if top_score>=2 and top_score>=second_score+1: return top_cat
    nonzero = [c for c,s in scores.items() if s>0]
    if len(nonzero)==1: return nonzero[0]
    return ""

# --------- LLM으로 시기(계절/월) 추정 (월 입력 없을 때 백업) ----------
def llm_infer_period(name: str) -> Tuple[str, List[int]]:
    if _client_mode is None or _client is None: return "", []
    prompt = f"""
한국의 축제명: "{name}"

다음 포맷의 JSON으로만 출력(설명 금지):
{{
  "season": "봄|여름|가을|겨울|불명",
  "months": [<1..12 정수들의 배열, 중복X>]
}}
- 축제명이 시기를 강하게 암시하면 그에 맞춰 months를 1~2개월로 좁게
- 불확실하면 season만 판단하고 months는 비워도 됨
"""
    try:
        if _client_mode == "new":
            r = _client.chat.completions.create(
                model=OPENAI_MODEL,
                messages=[
                    {"role":"system","content":"Output only valid JSON."},
                    {"role":"user","content":prompt},
                ],
                temperature=0, max_tokens=60,
            )
            content = r.choices[0].message.content.strip()
        else:
            r = _client.ChatCompletion.create(
                model=OPENAI_MODEL,
                messages=[
                    {"role":"system","content":"Output only valid JSON."},
                    {"role":"user","content":prompt},
                ],
                temperature=0, max_tokens=60,
            )
            content = r["choices"][0]["message"]["content"].strip()
        data = _extract_json(content) or {}
        season = str(data.get("season","")).strip()
        months = data.get("months", [])
        if not isinstance(months, list): months=[]
        months = [int(m) for m in months if str(m).isdigit() and 1<=int(m)<=12]
        if not months and season in ["봄","여름","가을","겨울"]:
            months = season_to_months(season)
        # dedupe
        seen=set(); ded=[]
        for m in months:
            if m not in seen: seen.add(m); ded.append(m)
        return season, ded
    except Exception:
        return "", []

# ---------------- State / Nodes ----------------
class FestState(BaseModel):
    if ConfigDict is not None:
        model_config = ConfigDict(arbitrary_types_allowed=True)
    else:
        class Config:
            arbitrary_types_allowed = True

    # 입력
    query_name: str = Field("", description="사용자가 입력한 축제명")
    target_months_input: Optional[Union[str,int,List[int]]] = None
    csv_path_override: Optional[str] = None
    exclude_upcoming: bool = True   # 아직 시작 안 한 축제 제외 여부

    # 내부
    df: Optional[pd.DataFrame] = None
    detected_type: str = ""
    classification_source: str = ""    # dataset | rule | llm
    matched_name: str = ""
    evidence: Dict[str, str] = {}      # 문자열만
    name_keyword_hits: Dict[str, List[str]] = {}
    target_months: List[int] = []
    detected_season: str = ""

    # 출력
    same_type_df: Optional[pd.DataFrame] = None
    period_filtered_df: Optional[pd.DataFrame] = None

def node_load_data(state: FestState) -> FestState:
    if state.csv_path_override and Path(state.csv_path_override).exists():
        df = pd.read_csv(state.csv_path_override, dtype=str).fillna("")
        df = ensure_columns(df, need_dates=True)
        df["_출처파일"] = Path(state.csv_path_override).name
    else:
        df = read_many_csv(CANDIDATE_FILES)
    state.df = df
    return state

def node_detect_type(state: FestState) -> FestState:
    assert state.df is not None
    state.name_keyword_hits = collect_name_keyword_hits(state.query_name)

    # 1) 데이터셋
    dtype, cnt, mname, dsevi = detect_type_from_dataset(state.df, state.query_name)
    if dtype:
        state.detected_type = dtype
        state.classification_source = "dataset"
        state.matched_name = mname
        ev = {"dataset_label": dtype}
        if dsevi: ev.update({k:str(v) for k,v in dsevi.items()})
        state.evidence = ev
        return state

    # 2) 규칙
    rule_label = rule_based_label(state.name_keyword_hits)
    if rule_label:
        state.detected_type = rule_label
        state.classification_source = "rule"
        state.matched_name = ""
        hits_str = ", ".join(state.name_keyword_hits.get(rule_label, []))
        state.evidence = {"rule_label": rule_label, "rule_hits": hits_str}
        return state

    # 3) LLM
    label, hints = llm_infer_type_with_evidence(state.query_name)
    state.detected_type = label
    state.classification_source = "llm"
    state.matched_name = ""
    state.evidence = {"llm_label": label, "llm_hint_keywords": ", ".join(hints)}
    return state

def node_detect_period(state: FestState) -> FestState:
    """월 입력 있으면 그걸 사용, 없으면 데이터/LLM로 추정"""
    assert state.df is not None
    # 0) 입력 우선
    months = parse_months_input(state.target_months_input)
    if months:
        state.target_months = months
        return state

    # 1) 데이터(같은 이름 매칭 행)에서 기간 월 추출
    base_name = state.matched_name or state.query_name
    row = state.df[state.df["축제명"].astype(str).str.strip() == base_name.strip()]
    if not row.empty:
        sd = parse_date_str(row.iloc[0]["시작일"])
        ed = parse_date_str(row.iloc[0]["종료일"])
        mm = months_in_range(sd, ed)
        if mm:
            state.target_months = sorted(list(mm))
            # 시즌 힌트
            if set(mm) & {3,4,5}: state.detected_season = state.detected_season or "봄"
            if set(mm) & {6,7,8}: state.detected_season = state.detected_season or "여름"
            if set(mm) & {9,10,11}: state.detected_season = state.detected_season or "가을"
            if set(mm) & {12,1,2}: state.detected_season = state.detected_season or "겨울"
            return state

    # 2) LLM 추정
    season, mm_list = llm_infer_period(state.query_name)
    state.detected_season = season or ""
    state.target_months = mm_list or []
    return state

def node_filter_same_type(state: FestState) -> FestState:
    assert state.df is not None
    if not state.detected_type:
        state.same_type_df = pd.DataFrame(); return state

    same = state.df[state.df["축제 유형"].astype(str).str.strip() == state.detected_type].copy()

    # 아직 시작 안 한 축제 제외(옵션)
    if state.exclude_upcoming:
        today = today_floor_ts()
        same["시작일_dt"] = same["시작일"].apply(parse_date_str)
        same = same[(~same["시작일_dt"].isna()) & (same["시작일_dt"] <= today)]
    else:
        same["시작일_dt"] = same["시작일"].apply(parse_date_str)

    # 자기 자신 제외
    inorm = normalize_text(state.query_name)
    mnorm = normalize_text(state.matched_name) if state.matched_name else ""
    def _is_self_row(x: str) -> bool:
        nx = normalize_text(x)
        return (nx == inorm) or (bool(mnorm) and (nx == mnorm))
    same = same[~same["축제명"].astype(str).apply(_is_self_row)]

    same = same.sort_values(by=["광역자치단체명","기초자치단체명","축제명"])
    state.same_type_df = same[["연번","광역자치단체명","기초자치단체명","축제명","축제 유형","시작일","종료일","_출처파일","시작일_dt"]]
    return state

def node_filter_by_period(state: FestState) -> FestState:
    df = state.same_type_df
    if df is None or df.empty:
        state.period_filtered_df = pd.DataFrame(); return state

    months = set(int(m) for m in state.target_months if 1<=int(m)<=12)
    if not months:
        # 타깃 월 없으면 동일 유형 그대로
        state.period_filtered_df = df.drop(columns=["시작일_dt"], errors="ignore")
        return state

    df = df.copy()
    df["종료일_dt"] = df["종료일"].apply(parse_date_str)

    def _hit(row) -> bool:
        sd = row["시작일_dt"]; ed = row["종료일_dt"]
        rng = months_in_range(sd, ed)
        return len(rng & months) > 0

    df = df[(~df["시작일_dt"].isna()) & (~df["종료일_dt"].isna()) & df.apply(_hit, axis=1)]
    df = df.drop(columns=["시작일_dt","종료일_dt"]).reset_index(drop=True)
    state.period_filtered_df = df
    return state

def build_app():
    g = StateGraph(FestState)
    g.add_node("load_data", node_load_data)
    g.add_node("detect_type", node_detect_type)
    g.add_node("detect_period", node_detect_period)
    g.add_node("filter_same_type", node_filter_same_type)
    g.add_node("filter_by_period", node_filter_by_period)

    g.add_edge(START, "load_data")
    g.add_edge("load_data", "detect_type")
    g.add_edge("detect_type", "detect_period")
    g.add_edge("detect_period", "filter_same_type")
    g.add_edge("filter_same_type", "filter_by_period")
    g.add_edge("filter_by_period", END)
    return g.compile()

APP = build_app()

def _to_state(obj) -> FestState:
    if isinstance(obj, FestState): return obj
    try:
        return FestState.model_validate(obj)
    except Exception:
        try:
            return FestState.parse_obj(obj)
        except Exception:
            return FestState(**obj)

# ---------------- Runner ----------------
def run_pipeline_with_months(
    festival_name: str,
    months: Optional[Union[str,int,List[int]]] = None,
    csv_path_override: Optional[str] = None,
    print_limit: int = 300,
    dedupe: bool = True,
    exclude_upcoming: bool = True,
) -> FestState:
    init = FestState(
        query_name=festival_name,
        target_months_input=months,
        csv_path_override=csv_path_override,
        exclude_upcoming=exclude_upcoming,
    )
    raw = APP.invoke(init)
    s = _to_state(raw)

    # --- 헤더 ---
    print(f"입력 축제명: {festival_name}")
    print(f"탐지된 축제유형: {s.detected_type or '(탐지 실패)'}  | 출처: {s.classification_source or '-'}")

    # 근거
    if s.classification_source == "dataset":
        file = s.evidence.get("file",""); lab = s.evidence.get("dataset_label",""); no = s.evidence.get("연번","")
        print(f"근거(dataset): CSV의 '축제 유형'='{lab}' (파일: {file}, 연번: {no})")
    elif s.classification_source == "rule":
        lab = s.evidence.get("rule_label",""); hits = s.evidence.get("rule_hits","")
        print(f"근거(rule): 규칙 라벨='{lab}', 히트 키워드=[{hits}]")
    elif s.classification_source == "llm":
        lab = s.evidence.get("llm_label",""); hints = s.evidence.get("llm_hint_keywords","")
        print(f"근거(llm): 모델 라벨='{lab}', 힌트 키워드=[{hints}]")

    # 월 요약
    months_txt = ",".join(str(m) for m in s.target_months) if s.target_months else "-"
    season_txt = s.detected_season or "-"
    print(f"타깃 월: {months_txt} (시즌 추정: {season_txt})")

    # 결과
    df = s.period_filtered_df if s.period_filtered_df is not None else pd.DataFrame()
    if not df.empty:
        if dedupe:
            df = df.drop_duplicates(subset=["축제명"], keep="first")
        df["_sd"] = pd.to_datetime(df["시작일"], errors="coerce")
        df = df.sort_values(by=["_sd","축제명"]).drop(columns=["_sd"])
        rows = [f"{r['축제명']}  {r['시작일']} ~ {r['종료일']}" for _,r in df.iterrows()]
    else:
        rows = []

    print(f"\n동일 유형 & 같은 시기 축제 수: {len(rows)}")
    if rows:
        print("\n[동일 유형 · 유사 시기]")
        for i, line in enumerate(rows[:print_limit], start=1):
            print(f"{i:>3}. {line}")
        if len(rows) > print_limit:
            print(f"... ({len(rows)-print_limit}개 더 있음)")
    return s

# ------------- Example -------------
if __name__ == "__main__":
    # 예시 1) 월 명시: 12월
    state = run_pipeline_with_months("임실 산타축제", months="겨울")
    # 예시 2) 월 범위: "11-12"
    # state = run_pipeline_with_months("담양산타축제", months="11-12")
    # 예시 3) 계절 키워드: "겨울"
    # state = run_pipeline_with_months("담양산타축제", months="겨울")
    # 예시 4) 월 미입력 → 데이터/LLM로 추정
    # state = run_pipeline_with_months("담양산타축제", months=None)


입력 축제명: 임실 산타축제
탐지된 축제유형: 주민화합  | 출처: dataset
근거(dataset): CSV의 '축제 유형'='주민화합' (파일: 2025_축제_핵심필드.csv, 연번: 965)
타깃 월: 12,1,2 (시즌 추정: -)

동일 유형 & 같은 시기 축제 수: 8

[동일 유형 · 유사 시기]
  1. 제13회 금산천 봄꽃축제  2024-01-04 ~ 2024-01-04
  2. 제13회 남일면 홍도화축제  2024-01-04 ~ 2024-01-04
  3. (가제) 상플 1·8부두 야시장  2024-01-06 ~ 2024-01-06
  4. 제11회 야맥축제  2024-01-06 ~ 2024-01-06
  5. 기장갯마을축제  2024-01-07 ~ 2024-01-08
  6. 동인천 낭만축제  2024-01-10 ~ 2024-01-10
  7. 태백제   2024-01-10 ~ 2024-01-10
  8. (가칭)오산 산타 마켓(제2회 오산 크리스마스 마켓)  2024-11-22 ~ 2024-12-24
