In [18]:
# Cell 1: Install minimal deps (safe re-run)
import sys, subprocess

def pip_install(pkg):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])
    except Exception as e:
        print(f"Warning: couldn't install {pkg}: {e}")

# Colab th∆∞·ªùng ƒë√£ c√≥ pandas & python-dateutil; ta ch·ªâ ƒë·∫£m b·∫£o pydantic v2+
pip_install("pydantic>=2.0.0")
# N·∫øu m√¥i tr∆∞·ªùng thi·∫øu dateutil (hi·∫øm), b·ªè comment d√≤ng d∆∞·ªõi:
# pip_install("python-dateutil>=2.8.2")

print("‚úÖ Cell 1 done: Dependencies checked/installed.")


‚úÖ Cell 1 done: Dependencies checked/installed.


thi·∫øt l·∫≠p m√∫i gi·ªù, import

In [19]:
# Cell 2: Env & imports
import os
os.environ["TZ"] = "Asia/Ho_Chi_Minh"

# Core imports
import json
from datetime import datetime, date, timedelta
import pandas as pd
from dateutil.relativedelta import relativedelta
from dateutil import tz
from pydantic import BaseModel, Field, ValidationError

# Confirm timezone
local_tz = tz.gettz("Asia/Ho_Chi_Minh")
now_local = datetime.now(local_tz)
print(f"‚úÖ Cell 2 done: Timezone set to Asia/Ho_Chi_Minh. Now: {now_local:%Y-%m-%d %H:%M:%S %Z}")


‚úÖ Cell 2 done: Timezone set to Asia/Ho_Chi_Minh. Now: 2025-09-11 02:35:23 +07


c·∫•u h√¨nh k·∫øt n·ªëi Postgres, helper db_available().

In [20]:
# DB config (PostgreSQL) for persona notebook
import os
from sqlalchemy import create_engine, text

PG_HOST = os.getenv("PG_HOST", "localhost")
PG_PORT = int(os.getenv("PG_PORT", "5435"))
PG_DB   = os.getenv("PG_DB", "db_fin")
PG_USER = os.getenv("PG_USER", "HiepData")
PG_PASSWORD = os.getenv("PG_PASSWORD", "123456")

PG_URL = f"postgresql+psycopg2://{PG_USER}:{PG_PASSWORD}@{PG_HOST}:{PG_PORT}/{PG_DB}"

engine = None
try:
    engine = create_engine(PG_URL, pool_pre_ping=True, future=True)
    with engine.connect() as conn:
        conn.execute(text("SELECT 1"))
    print("‚úÖ PostgreSQL connected (persona):", PG_URL.rsplit("@",1)[-1])
except Exception as e:
    engine = None
    print("‚ÑπÔ∏è Persona notebook cannot connect PostgreSQL:", e)


def db_available() -> bool:
    return engine is not None



‚úÖ PostgreSQL connected (persona): localhost:5435/db_fin


ƒë·∫∑t DATA_DIR ƒë·ªÉ ƒë·ªçc CSV fallback khi kh√¥ng c√≥ DB.

In [21]:
# Load demo feature_monthly ∆∞u ti√™n t·ª´ PostgreSQL, fallback CSV/JSON
import os, json, pandas as pd
from sqlalchemy import text

# ƒê·∫£m b·∫£o DATA_DIR t·ªìn t·∫°i tr∆∞·ªõc khi d√πng
if 'DATA_DIR' not in globals():
    DATA_DIR = os.getcwd()

csv_path = os.path.join(DATA_DIR, "features_monthly.csv")
json_path = os.path.join(DATA_DIR, "feature_monthly_demo_updated.json")

df = None
if db_available():
    try:
        with engine.connect() as conn:
            sql = text("""
                SELECT * FROM features_monthly
                ORDER BY customer_id ASC
                LIMIT 1
            """)
            df = pd.read_sql(sql, conn)
            print("‚úÖ Loaded demo row from PostgreSQL")
    except Exception as e:
        print("‚ÑπÔ∏è DB read failed, fallback CSV/JSON:", e)

if df is None or df.empty:
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)
        print(f"‚úÖ Loaded CSV: {csv_path}")
    elif os.path.exists(json_path):
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        df = pd.DataFrame(data)
        print(f"‚úÖ Loaded JSON: {json_path}")
    else:
        raise FileNotFoundError("‚ùå Kh√¥ng t√¨m th·∫•y d·ªØ li·ªáu demo (DB/CSV/JSON).")

print("---- DataFrame preview ----")
display(df)



‚úÖ Loaded demo row from PostgreSQL
---- DataFrame preview ----


Unnamed: 0,customer_id,year_month,age,segment,income,spend,balance_avg,loan,digital_logins_30d,incoming_tx_cnt_30d,...,spend_ratio,dti,cashflow_volatility,liquidity_buffer,digital_index,inflow_baseline_90d,max_inflow_z_30d,max_inflow_pct_30d,large_inflow_flag_7d,days_since_large_inflow
0,1,2025-08,48,family,9475780.0,7623236.0,1877863.0,20676745.0,19,2,...,0.804,2.182,0.164,0.868,0.826,9818341.0,-0.016,0.496,0,119


In [22]:
# Cell 3: Local path config (run outside Colab)
import os

DATA_DIR = os.getcwd()
print("‚úÖ Using local DATA_DIR:", DATA_DIR)


‚úÖ Using local DATA_DIR: c:\Users\admin1\Downloads\hackathon


Install & init Gemini

In [23]:
# # Cell 4: Load demo feature_monthly t·ª´ file local trong th∆∞ m·ª•c d·ª± √°n

# import os, json, pandas as pd

# csv_path = os.path.join(DATA_DIR, "features_monthly.csv")
# json_path = os.path.join(DATA_DIR, "feature_monthly_demo_updated.json")  # optional fallback n·∫øu c√≥

# if os.path.exists(csv_path):
#     df = pd.read_csv(csv_path)
#     print(f"‚úÖ Loaded CSV: {csv_path}")
# elif os.path.exists(json_path):
#     with open(json_path, "r", encoding="utf-8") as f:
#         data = json.load(f)
#     df = pd.DataFrame(data)
#     print(f"‚úÖ Loaded JSON: {json_path}")
# else:
#     raise FileNotFoundError(
#         "‚ùå Kh√¥ng t√¨m th·∫•y 'features_monthly.csv' trong th∆∞ m·ª•c d·ª± √°n."
#     )

# print("---- DataFrame preview ----")
# display(df)


In [24]:
# Cell 5: Build planning_context JSON t·ª´ 1 h√†ng feature_monthly

from datetime import datetime, timedelta
from dateutil import tz

def _to_int_safe(x, default=0):
    try:
        return int(x)
    except Exception:
        return default

def _parse_bills(s: str):
    """
    '10:rent=3500000;15:electric=500000;25:net=300000'
    -> [{'dom':10,'title':'rent','amount':3500000}, ...]
    """
    out = []
    if not isinstance(s, str) or not s.strip():
        return out
    parts = [p.strip() for p in s.split(";") if p.strip()]
    for p in parts:
        try:
            dom_str, rest = p.split(":", 1)
            title, amount_str = rest.split("=", 1)
            out.append({
                "dom": int(dom_str),
                "title": title.strip(),
                "amount": _to_int_safe(amount_str)
            })
        except:
            continue
    return out

def _parse_events(s: str):
    """
    '13:birthday=300000;20:trip=1000000'
    -> [{'dom':13,'title':'birthday','budget':300000}, ...]
    """
    out = []
    if not isinstance(s, str) or not s.strip():
        return out
    parts = [p.strip() for p in s.split(";") if p.strip()]
    for p in parts:
        try:
            dom_str, rest = p.split(":", 1)
            title, amount_str = rest.split("=", 1)
            out.append({
                "dom": int(dom_str),
                "title": title.strip(),
                "budget": _to_int_safe(amount_str)
            })
        except:
            continue
    return out

def _date_candidates_in_horizon(today_dt, dom_values, horizon_days):
    """
    Tr·∫£ v·ªÅ danh s√°ch datetime trong c·ª≠a s·ªï [today, today+horizon_days)
    kh·ªõp v·ªõi day-of-month (DOM) cho th√°ng hi·ªán t·∫°i.
    N·∫øu DOM ƒë√£ qua trong th√°ng hi·ªán t·∫°i, b·ªè qua (kh√¥ng l√πi v·ªÅ th√°ng tr∆∞·ªõc).
    """
    last_dt = today_dt + timedelta(days=horizon_days)
    out = []
    cur = today_dt
    while cur < last_dt:
        if cur.day in dom_values and cur.month == today_dt.month:
            out.append(cur)
        cur += timedelta(days=1)
    return out

def build_planning_context_from_row(row, horizon_days=7, persona="mentor"):
    local_tz = tz.gettz("Asia/Ho_Chi_Minh")
    today_dt = datetime.now(local_tz).date()  # d√πng gi·ªù ƒë·ªãa ph∆∞∆°ng ƒë√£ set

    # ƒê·ªçc tr∆∞·ªùng b·∫Øt bu·ªôc (√°nh x·∫° t·ª´ schema th·ª±c t·∫ø)
    customer_id = int(row.get("customer_id", 0))

    # income/fixed/variable: √°nh x·∫° m·ªÅm t·ª´ c√°c c·ªôt c√≥ th·∫≠t
    income_net = int(row.get("income_net_month", row.get("income", 0)))
    fixed_bills = int(row.get("fixed_bills_month", row.get("loan", 0)))
    variable_spend = int(row.get("variable_spend_month", row.get("spend", 0)))

    cash_on_hand = int(row.get("cash_on_hand", 0))

    # goal: n·∫øu kh√¥ng c√≥, ƒë·∫∑t m·ª•c ti√™u t·ªëi thi·ªÉu ƒë·ªÉ kh√¥ng g√£y lu·ªìng
    goal_amount = int(row.get("goal_amount", max(0, int(0.1 * income_net) if income_net else 1000000)))
    # deadline m·∫∑c ƒë·ªãnh = cu·ªëi th√°ng hi·ªán t·∫°i
    try:
        goal_deadline = str(row.get("goal_deadline"))
        if goal_deadline in [None, "None", "nan", "NaT"]:
            raise Exception()
    except Exception:
        from datetime import date
        d = date(today_dt.year, today_dt.month, 28)
        goal_deadline = d.isoformat()

    # Budgets baseline theo nh√≥m (monthly) v·ªõi fallback v·ªÅ spend/4
    def _fallback_budget(key):
        return int(row.get(key, 0)) if row.get(key) is not None else 0

    base_guess = int(variable_spend/4) if variable_spend else 0
    budgets_baseline = {
        "food_out": _fallback_budget("food_out_month") or base_guess,
        "snacks": _fallback_budget("snacks_month") or base_guess,
        "transport": _fallback_budget("transport_month") or base_guess,
        "entertainment": _fallback_budget("entertainment_month") or base_guess,
    }

    # Payday DOM (cho ph√©p nhi·ªÅu gi√° tr·ªã ph√¢n t√°ch ; ho·∫∑c ,)
    payday_dom_str = str(row.get("payday_dom", "")).strip()
    payday_dom_list = []
    if payday_dom_str:
        for token in payday_dom_str.replace(",", ";").split(";"):
            token = token.strip()
            if token:
                try:
                    payday_dom_list.append(int(token))
                except:
                    pass

    # Bills & Events
    bills = _parse_bills(str(row.get("bill_dom_list", "")))
    events = _parse_events(str(row.get("events_dom_list", "")))

    # T√¨m c√°c ng√†y ƒë·∫∑c bi·ªát trong horizon
    # Payday
    special_days = []
    if payday_dom_list:
        paydays = _date_candidates_in_horizon(today_dt, payday_dom_list, horizon_days)
        for d in paydays:
            special_days.append({"date": d.isoformat(), "kind": "payday"})

    # Bills
    bill_doms = [b["dom"] for b in bills]
    bill_dates = _date_candidates_in_horizon(today_dt, bill_doms, horizon_days)
    for d in bill_dates:
        # kh·ªõp b·∫£n ghi
        dom = d.day
        for b in bills:
            if b["dom"] == dom:
                special_days.append({
                    "date": d.isoformat(),
                    "kind": "bill",
                    "title": b["title"],
                    "amount_lock": b["amount"]
                })

    # Events
    event_doms = [e["dom"] for e in events]
    event_dates = _date_candidates_in_horizon(today_dt, event_doms, horizon_days)
    for d in event_dates:
        dom = d.day
        for e in events:
            if e["dom"] == dom:
                special_days.append({
                    "date": d.isoformat(),
                    "kind": "event",
                    "title": e["title"],
                    "budget_est": e["budget"]
                })

    planning_context = {
        "customer_id": customer_id,
        "today": today_dt.isoformat(),
        "horizon_days": int(horizon_days),
        "income_net": income_net,
        "fixed_bills": fixed_bills,
        "variable_spend": variable_spend,
        "cash_on_hand": cash_on_hand,
        "goal_amount": goal_amount,
        "goal_deadline": goal_deadline,
        "budgets_baseline": budgets_baseline,
        "special_days": sorted(special_days, key=lambda x: x["date"]),
        "persona": persona  # "mentor" | "angry_mom" | "banter"
    }
    print("‚úÖ planning_context generated.")
    return planning_context

# L·∫•y h√†ng ƒë·∫ßu ti√™n ƒë·ªÉ demo
row0 = df.iloc[0].to_dict()
planning_context_7d = build_planning_context_from_row(row0, horizon_days=14, persona="mentor")

import json
print(json.dumps(planning_context_7d, indent=2, ensure_ascii=False))


‚úÖ planning_context generated.
{
  "customer_id": 1,
  "today": "2025-09-11",
  "horizon_days": 14,
  "income_net": 9475780,
  "fixed_bills": 20676745,
  "variable_spend": 7623236,
  "cash_on_hand": 0,
  "goal_amount": 947578,
  "goal_deadline": "2025-09-28",
  "budgets_baseline": {
    "food_out": 1905809,
    "snacks": 1905809,
    "transport": 1905809,
    "entertainment": 1905809
  },
  "special_days": [],
  "persona": "mentor"
}


In [25]:
# Cell 6: Install & init Gemini (env-based, safe when missing)
import os, sys, subprocess

def pip_install(pkg):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])
    except Exception as e:
        print(f"Warning: couldn't install {pkg}: {e}")

# ƒê·∫£m b·∫£o c√≥ SDK ch√≠nh th·ª©c c·ªßa Gemini
pip_install("google-generativeai>=0.7.2")

# ƒê·ªçc API key t·ª´ bi·∫øn m√¥i tr∆∞·ªùng (ƒë√£ thi·∫øt l·∫≠p ngo√†i notebook)
# N·∫øu kernel kh√¥ng th·∫•y bi·∫øn m√¥i tr∆∞·ªùng do kh·ªüi ƒë·ªông tr∆∞·ªõc khi `setx`, th·ª≠ l·∫•y t·ª´ Windows User env (Registry)
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
if not GEMINI_API_KEY:
    try:
        import sys
        if sys.platform.startswith("win"):
            import winreg
            with winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Environment") as k:
                v, _ = winreg.QueryValueEx(k, "GEMINI_API_KEY")
                if v:
                    os.environ["GEMINI_API_KEY"] = v
                    GEMINI_API_KEY = v
                    print("‚ÑπÔ∏è Loaded GEMINI_API_KEY from Windows user env.")
    except Exception:
        pass

GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")

try:
    import google.generativeai as genai
except Exception:
    genai = None

model = None
if genai is not None and GEMINI_API_KEY:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        model = genai.GenerativeModel(GEMINI_MODEL)
        print(f"‚úÖ Cell 6 done: Gemini model '{GEMINI_MODEL}' is ready.")
    except Exception as e:
        print("‚ö†Ô∏è Gemini init failed:", e)
else:
    print("‚ÑπÔ∏è Gemini not configured (missing package or GEMINI_API_KEY). LLM calls will be skipped.")


‚úÖ Cell 6 done: Gemini model 'gemini-2.0-flash' is ready.


In [26]:
# # Cell 7 (REPLACE): Generate strict-JSON 7-day plan from planning_context

# import json, re

# # Re-init model with strict JSON response (only if model is available)
# PERSONA_MAP = {
#     "mentor": "Ng∆∞·ªùi c·ªë v·∫•n th√¥ng th√°i",
#     "angry_mom": "M·∫π gi·∫≠n d·ªØ",
#     "banter": "Th√≠ch c√† kh·ªãa"
# }

# def build_user_prompt(pc: dict) -> str:
#     persona_label = PERSONA_MAP.get(pc.get("persona", "mentor"), "Ng∆∞·ªùi c·ªë v·∫•n th√¥ng th√°i")
#     return json.dumps({
#         "persona": persona_label,
#         "planning_context": pc,
#         "output_contract": {
#             "week_plan_item": {"date": "YYYY-MM-DD", "tasks": ["task1", "task2"]},
#             "week_plan_length": pc.get("horizon_days", 7),
#             "supervision_note_example": "T√¥i s·∫Ω gi√°m s√°t tu·∫ßn n√†y. ƒê·∫°t ‚Üí l·∫∑p l·∫°i; Kh√¥ng ƒë·∫°t ‚Üí ƒëi·ªÅu ch·ªânh."
#         }
#     }, ensure_ascii=False)

# def generate_week_plan_json(pc: dict):
#     if model is None:
#         raise RuntimeError("GEMINI_API_KEY ch∆∞a ƒë∆∞·ª£c thi·∫øt l·∫≠p ho·∫∑c SDK ch∆∞a s·∫µn s√†ng. Kh√¥ng th·ªÉ g·ªçi LLM.")

#     # override system instruction via new GenerativeModel with JSON output
#     mdl = None
#     try:
#         import google.generativeai as genai
#         mdl = genai.GenerativeModel(
#             model_name=GEMINI_MODEL,
#             system_instruction=(
#                 "B·∫°n l√† LLM t∆∞ v·∫•n ti·∫øt ki·ªám c√° nh√¢n.\n"
#                 "Nhi·ªám v·ª•:\n"
#                 "1. KH√îNG gi·∫£i th√≠ch, KH√îNG vi·∫øt th√™m l·ªùi ngo√†i JSON.\n"
#                 '2. Ch·ªâ xu·∫•t JSON h·ª£p l·ªá c√≥ 2 key: "week_plan" (list) v√† "supervision_note".\n'
#                 '3. "week_plan" c√≥ ƒë√∫ng N ng√†y (N=horizon_days).\n'
#                 '   - M·ªói ng√†y: {"date":"YYYY-MM-DD","tasks":["task1","task2",...]}.\n'
#                 "   - M·ªói task ph·∫£i l√† m·ªánh l·ªánh r√µ r√†ng, c√≥ s·ªë c·ª• th·ªÉ ho·∫∑c h√†nh ƒë·ªông ƒëo l∆∞·ªùng ƒë∆∞·ª£c.\n"
#                 '   - C·∫•m d√πng t·ª´ m∆° h·ªì ki·ªÉu "xem x√©t", "theo d√µi", "l√™n k·∫ø ho·∫°ch".\n'
#                 "4. √Åp d·ª•ng special_days:\n"
#                 '   - payday ‚Üí task "chuy·ªÉn ngay 20‚Äì30% l∆∞∆°ng (ho·∫∑c s·ªë ƒë·ªß ƒë·ªÉ ƒë·∫°t weekly_save) v√†o qu·ªπ", KH√îNG v∆∞·ª£t 30%.\n'
#                 '   - bill ‚Üí task "kh√≥a s·ªë ti·ªÅn bill X" + "c·∫•m chi ti√™u bi·∫øn ƒë·ªïi trong ng√†y ƒë√≥".\n'
#                 '   - event ‚Üí task "phong b√¨ X cho s·ª± ki·ªán" + "c·∫Øt gi·∫£i tr√≠ v·ªÅ 0 trong tu·∫ßn n√†y".\n'
#                 "5. Persona ·∫£nh h∆∞·ªüng ƒë·∫øn C√ÅCH VI·∫æT task:\n"
#                 "   - Mentor: nh·∫π nh√†ng nh∆∞ng r√µ con s·ªë.\n"
#                 "   - Angry_mom: m·ªánh l·ªánh c·ª©ng r·∫Øn, nghi√™m kh·∫Øc.\n"
#                 "   - Banter: ch√¢m ch·ªçc, h√≥m h·ªânh, nh∆∞ng v·∫´n n√™u con s·ªë c·ª• th·ªÉ.\n"
#                 '6. supervision_note lu√¥n = "T√¥i s·∫Ω gi√°m s√°t tu·∫ßn n√†y. ƒê·∫°t ‚Üí l·∫∑p l·∫°i; Kh√¥ng ƒë·∫°t ‚Üí ƒëi·ªÅu ch·ªânh."'
#             ),
#             generation_config={
#                 "temperature": 0.4,
#                 "response_mime_type": "application/json"
#             }
#         )
#     except Exception as e:
#         raise RuntimeError(f"Kh√¥ng kh·ªüi t·∫°o ƒë∆∞·ª£c Gemini model: {e}")

#     prompt_payload = build_user_prompt(pc)
#     resp = mdl.generate_content(prompt_payload)
#     text = ""
#     if resp and getattr(resp, "candidates", None):
#         part = resp.candidates[0].content.parts[0]
#         text = getattr(part, "text", "") or getattr(part, "inline_data", {}).get("data", "")
#     if not text:
#         raise ValueError("Model returned empty response.")

#     # Parse JSON
#     try:
#         data = json.loads(text)
#     except json.JSONDecodeError:
#         m = re.search(r"\{[\s\S]*\}\s*$", text.strip())
#         if not m:
#             raise
#         data = json.loads(m.group(0))

#     if "week_plan" not in data or not isinstance(data["week_plan"], list):
#         raise AssertionError("week_plan missing or not a list")
#     if "supervision_note" not in data:
#         raise AssertionError("supervision_note missing")

#     expected_len = int(pc.get("horizon_days", 7))
#     if len(data["week_plan"]) != expected_len:
#         data["week_plan"] = data["week_plan"][:expected_len]

#     print("‚úÖ Generated 7-day plan JSON:")
#     print(json.dumps(data, indent=2, ensure_ascii=False))
#     return data

# week_plan_json = generate_week_plan_json(planning_context_7d)


In [27]:
# # Cell 8: Generate 14-day plan JSON (c√≥ ng√†y event 13/09)

# # T·∫°o planning_context 14 ng√†y t·ª´ row ƒë√£ load
# planning_context_14d = build_planning_context_from_row(
#     df.iloc[0].to_dict(),
#     horizon_days=14,
#     persona="mentor"   # b·∫°n c√≥ th·ªÉ ƒë·ªïi: "angry_mom", "banter"
# )

# week_plan_json_14d = generate_week_plan_json(planning_context_14d)


In [28]:
# # Cell 9 (updated): Persona-based reply v·ªõi s·ªë ti·ªÅn c·ª• th·ªÉ

# import re

# def _extract_amount(text: str) -> int:
#     """
#     Parse s·ªë ti·ªÅn t·ª´ event_text (d·∫°ng '100k', '200000', '1.2m', '1,000,000').
#     Tr·∫£ v·ªÅ s·ªë int (VND) ho·∫∑c None n·∫øu kh√¥ng parse ƒë∆∞·ª£c.
#     """
#     text = text.lower().replace(".", "").replace(",", "")
#     m = re.search(r"(\d+)(k|m|vnƒë|vnd)?", text)
#     if not m:
#         return None
#     num, unit = m.group(1), m.group(2)
#     amount = int(num)
#     if unit == "k":
#         amount *= 1000
#     elif unit == "m":
#         amount *= 1000000
#     return amount

# def persona_reply(event_text: str, persona: str = "mentor") -> str:
#     """
#     Tr·∫£ v·ªÅ ph·∫£n h·ªìi theo persona, lu√¥n k√®m s·ªë ti·ªÅn ƒëi·ªÅu ch·ªânh c·ª• th·ªÉ n·∫øu parse ƒë∆∞·ª£c.
#     """
#     amt = _extract_amount(event_text)
#     amt_str = f"{amt:,} VNƒê".replace(",", ".") if amt else None

#     if persona == "mentor":
#         if amt_str:
#             return f"B·∫°n v·ª´a chi {amt_str} cho {event_text}. H√£y c·∫Øt ƒë√∫ng {amt_str} kh·ªèi chi ti√™u ng√†y mai ƒë·ªÉ gi·ªØ m·ª•c ti√™u."
#         else:
#             return f"B·∫°n v·ª´a ph√°t sinh: {event_text}. H√£y gi·∫£m chi ti√™u ng√†y mai t∆∞∆°ng ·ª©ng ƒë·ªÉ gi·ªØ m·ª•c ti√™u."
#     elif persona == "angry_mom":
#         if amt_str:
#             return f"L·∫°i ti√™u {amt_str} h·∫£? Ngay l·∫≠p t·ª©c d·ª´ng l·∫°i! Ng√†y mai tr·ª´ th·∫≥ng {amt_str} kh·ªèi ng√¢n s√°ch."
#         else:
#             return f"Chi ti√™u v√¥ t·ªôi v·∫° n·ªØa h·∫£? {event_text} l√† qu√° m·ª©c r·ªìi! Ngay l·∫≠p t·ª©c d·ª´ng l·∫°i."
#     elif persona == "banter":
#         if amt_str:
#             return f"·ªêi d√†o, {event_text} l√†m v√≠ teo {amt_str} r·ªìi! Th√¥i, h√¥m nay stop, mai b√π l·∫°i {amt_str} nh√©."
#         else:
#             return f"√îi tr·ªùi, {event_text} l√†m v√≠ b·∫°n m√©o r·ªìi! H√¥m nay stop, mai b√π l·∫°i nha."
#     else:
#         return f"[Unknown persona] {event_text}"

# # Demo
# print("Mentor:", persona_reply("U·ªëng tr√† s·ªØa 100k", "mentor"))
# print("Angry mom:", persona_reply("ƒÇn buffet 250000 VNƒê", "angry_mom"))
# print("Banter:", persona_reply("Mua √°o m·ªõi 1m2", "banter"))


In [29]:
# # Cell 10: Demo h·ªôi tho·∫°i end-to-end (mock user)

# # 1. Ng∆∞·ªùi d√πng ch·ªçn persona
# chosen_persona = "banter"  # "mentor", "angry_mom", ho·∫∑c "banter"
# print(f"üë§ Ng∆∞·ªùi d√πng ch·ªçn persona: {chosen_persona}")

# # 2. Sinh k·∫ø ho·∫°ch (7 ho·∫∑c 14 ng√†y)
# horizon = 14  # ƒë·ªïi th√†nh 7 n·∫øu ch·ªâ mu·ªën 1 tu·∫ßn
# planning_context_demo = build_planning_context_from_row(
#     df.iloc[0].to_dict(),
#     horizon_days=horizon,
#     persona=chosen_persona
# )
# plan_json = generate_week_plan_json(planning_context_demo)

# # In k·∫ø ho·∫°ch g·ªçn g√†ng
# print("\nüìÖ K·∫ø ho·∫°ch sinh ra:")
# for day in plan_json["week_plan"]:
#     date = day["date"]
#     tasks = " | ".join(day["tasks"])
#     print(f"{date}: {tasks}")

# print("\nüîí supervision_note:", plan_json["supervision_note"])

# # 3. Ng∆∞·ªùi d√πng ph√°t sinh chi ti√™u
# event_text = "U·ªëng tr√† s·ªØa 100k"
# print(f"\nüë§ Ng∆∞·ªùi d√πng: {event_text}")

# # 4. Persona ph·∫£n h·ªìi
# reply = persona_reply(event_text, chosen_persona)
# print(f"ü§ñ {chosen_persona} reply:", reply)


In [30]:
# # Cell 11: Chat loop demo (t∆∞∆°ng t√°c nh∆∞ chat, tho√°t khi g√µ "exit")

# import os

# def regenerate_plan(planning_context, feedback: str):
#     """G·ªçi l·∫°i model v·ªõi feedback b·ªï sung ƒë·ªÉ sinh k·∫ø ho·∫°ch m·ªõi"""
#     pc = planning_context.copy()
#     pc["user_feedback"] = feedback
#     return generate_week_plan_json(pc)

# def chat_loop():
#     # B1: ch·ªçn persona
#     persona = input("Ch·ªçn persona (mentor / angry_mom / banter): ").strip().lower()
#     if persona not in ["mentor", "angry_mom", "banter"]:
#         persona = "mentor"
#     print(f"‚úÖ Persona ƒë√£ ch·ªçn: {persona}")

#     # B2: ch·ªçn horizon
#     horizon = input("B·∫°n mu·ªën k·∫ø ho·∫°ch m·∫•y ng√†y? (7/14): ").strip()
#     horizon = 14 if horizon == "14" else 7

#     planning_context = build_planning_context_from_row(
#         df.iloc[0].to_dict(),
#         horizon_days=horizon,
#         persona=persona
#     )
#     plan_json = generate_week_plan_json(planning_context)

#     print("\nüìÖ K·∫ø ho·∫°ch ban ƒë·∫ßu:")
#     for day in plan_json["week_plan"]:
#         print(f"{day['date']}: {' | '.join(day['tasks'])}")
#     print("supervision_note:", plan_json["supervision_note"])
#     print("\n--- B·∫Øt ƒë·∫ßu chat (g√µ 'exit' ƒë·ªÉ tho√°t) ---")

#     # Loop chat
#     while True:
#         user_input = input("\nüë§ B·∫°n: ").strip()
#         if user_input.lower() in ["exit", "quit"]:
#             print("ü§ñ LLM: K·∫øt th√∫c chat. H·∫πn g·∫∑p l·∫°i!")
#             break
#         elif user_input.lower() == "plan":
#             print("\nüìÖ K·∫ø ho·∫°ch hi·ªán t·∫°i:")
#             for day in plan_json["week_plan"]:
#                 print(f"{day['date']}: {' | '.join(day['tasks'])}")
#         elif user_input.lower().startswith("regen:"):
#             feedback = user_input[len("regen:"):].strip()
#             print(f"ü§ñ LLM: ƒêang sinh l·∫°i k·∫ø ho·∫°ch theo g√≥p √Ω: '{feedback}'...")
#             plan_json = regenerate_plan(planning_context, feedback)
#             for day in plan_json["week_plan"]:
#                 print(f"{day['date']}: {' | '.join(day['tasks'])}")
#         else:
#             reply = persona_reply(user_input, persona)
#             print(f"ü§ñ {persona}: {reply}")

# # Ch·∫°y chat loop (b·ªè qua khi NON_INTERACTIVE=1)
# if os.environ.get("NON_INTERACTIVE", "0") != "1":
#     chat_loop()
# else:
#     print("‚ÑπÔ∏è NON_INTERACTIVE=1: b·ªè qua chat loop.")


In [31]:
# Cell A: Session state & helpers

import re
from datetime import datetime
from dateutil import tz

# Kh·ªüi t·∫°o "b·ªô nh·ªõ h·ªôi tho·∫°i" cho phi√™n
session_state = {
    "persona": "mentor",              # "mentor" | "angry_mom" | "banter"
    "goal_reason": None,              # v√≠ d·ª•: "ƒëi du l·ªãch Nha Trang"
    "spend_events_log": [],           # list[{"ts","text","amount","category"}]
    "running_overage_today": 0        # t·ªïng v∆∞·ª£t ng√¢n s√°ch (demo, c√≥ th·ªÉ m·ªü r·ªông)
}

def set_persona(persona: str):
    persona = (persona or "").lower().strip()
    if persona not in ["mentor", "angry_mom", "banter"]:
        persona = "mentor"
    session_state["persona"] = persona
    return persona

def set_goal_reason(text: str | None):
    session_state["goal_reason"] = text.strip() if text else None

def _extract_amount(text: str) -> int | None:
    """
    Nh·∫≠n c√°c d·∫°ng ph·ªï bi·∫øn: 100k, 200000, 1.2m, 1m2, 150.000, 150,000, 150k‚Ä¶
    Tr·∫£ v·ªÅ s·ªë VND (int) ho·∫∑c None n·∫øu kh√¥ng parse ƒë∆∞·ª£c.
    """
    t = text.lower().replace("vnƒë", "").replace("vnd", "")
    # chu·∫©n h√≥a: b·ªè d·∫•u ch·∫•m/ngƒÉn c√°ch
    t_norm = t.replace(".", "").replace(",", "")
    # b·∫Øt '1m2' -> 1.2m
    t_norm = re.sub(r"(\d)tr(\d)", r"\1.\2m", t_norm)  # 1tr2 -> 1.2m
    # t√¨m s·ªë + ƒë∆°n v·ªã
    m = re.search(r"(\d+(?:\.\d+)?)(m|k)?", t_norm)
    if not m:
        return None
    num, unit = m.group(1), m.group(2)
    try:
        if unit == "m":
            return int(float(num) * 1_000_000)
        if unit == "k":
            return int(float(num) * 1_000)
        return int(float(num))
    except:
        return None

def _guess_category(text: str) -> str:
    text = text.lower()
    if any(k in text for k in ["tr√† s·ªØa", "snack", "ƒÉn v·∫∑t", "b√°nh", "k·∫πo"]):
        return "snacks"
    if any(k in text for k in ["ƒÉn ngo√†i", "cafe", "c√† ph√™", "nh√† h√†ng", "m√¨ cay", "b√∫n", "ph·ªü"]):
        return "food_out"
    if any(k in text for k in ["taxi", "grab", "bus", "xe bu√Ωt", "xƒÉng", "tr·∫°m thu ph√≠"]):
        return "transport"
    if any(k in text for k in ["phim", "game", "nh·∫°c", "karaoke", "gi·∫£i tr√≠"]):
        return "entertainment"
    if any(k in text for k in ["√°o", "qu·∫ßn", "gi√†y", "mua s·∫Øm", "ph·ª• ki·ªán"]):
        return "shopping"
    return "other"

def add_spend_event(user_text: str) -> dict:
    """
    Ghi nh·∫≠n m·ªôt l·∫ßn chi ti√™u ph√°t sinh v√†o session_state['spend_events_log'].
    Tr·∫£ v·ªÅ b·∫£n ghi ƒë√£ th√™m.
    """
    amt = _extract_amount(user_text)
    cat = _guess_category(user_text)
    local_tz = tz.gettz("Asia/Ho_Chi_Minh")
    rec = {
        "ts": datetime.now(local_tz).isoformat(timespec="seconds"),
        "text": user_text,
        "amount": amt,
        "category": cat
    }
    session_state["spend_events_log"].append(rec)
    # (demo) c·ªông d·ªìn overage n·∫øu c√≥ s·ªë
    if amt:
        session_state["running_overage_today"] += amt
    return rec

print("‚úÖ Cell A ready: session_state & helpers initialized.")


‚úÖ Cell A ready: session_state & helpers initialized.


In [32]:
# Cell B: LLM-driven persona reply (natural chat + numeric adjustment)

import json

# Map nh√£n persona ƒë·ªÉ ƒë∆∞a v√†o system prompt
PERSONA_MAP = {
    "mentor": "Ng∆∞·ªùi c·ªë v·∫•n th√¥ng th√°i ‚Äî ƒëi·ªÅm tƒ©nh, r√µ r√†ng, ƒë∆∞a s·ªë c·ª• th·ªÉ.",
    "angry_mom": "M·∫π gi·∫≠n d·ªØ ‚Äî nghi√™m kh·∫Øc, th·∫≥ng th·ª´ng, KH√îNG x√∫c ph·∫°m c√° nh√¢n.",
    "banter": "Th√≠ch c√† kh·ªãa ‚Äî h√≥m h·ªânh, ch√¢m ch·ªçc nh·∫π, nh∆∞ng v·∫´n ƒë∆∞a s·ªë c·ª• th·ªÉ."
}

def _short_special_days(pc: dict, max_days: int = 4) -> str:
    sdays = pc.get("special_days", [])[:max_days]
    return ", ".join([f"{d['date']}:{d['kind']}" for d in sdays]) or "kh√¥ng"

def _recent_spends(state: dict, k: int = 3) -> list:
    return state.get("spend_events_log", [])[-k:]

def llm_persona_reply(user_text: str, state: dict, planning_context: dict, plan_json: dict):
    """
    G·ªçi Gemini ƒë·ªÉ sinh l·ªùi ƒë√°p h·ªôi tho·∫°i t·ª± nhi√™n theo persona + ƒëi·ªÅu ch·ªânh s·ªë c·ª• th·ªÉ.
    Output JSON:
    {
      "message": "<1‚Äì2 c√¢u>",
      "adjustment": {"amount": <int or null>, "category": "<cat or null>", "when": "today|tomorrow"}
    }
    """
    # N·∫øu thi·∫øu model (ch∆∞a c·∫•u h√¨nh GEMINI_API_KEY), tr·∫£ fallback an to√†n
    try:
        import google.generativeai as genai
    except Exception:
        genai = None
    if genai is None or os.environ.get("GEMINI_API_KEY", "") == "":
        # Fallback: ƒë∆∞a ra khuy·∫øn ngh·ªã deterministic d·ª±a tr√™n parsing s·ªë ti·ªÅn
        new_rec = add_spend_event(user_text)
        amt = new_rec.get("amount")
        cat = new_rec.get("category") or "other"
        msg = (
            f"Ghi nh·∫≠n chi ti√™u '{user_text}'. "
            + (f"Ng√†y mai tr·ª´ {amt:,} VNƒê kh·ªèi {cat}.".replace(",", ".") if amt else "H√£y n√™u s·ªë ti·ªÅn ƒë·ªÉ t√¥i tr·ª´ ch√≠nh x√°c ng√†y mai.")
        )
        return {"message": msg, "adjustment": {"amount": amt, "category": cat, "when": "tomorrow"}}

    persona = state.get("persona", "mentor")
    persona_desc = PERSONA_MAP.get(persona, PERSONA_MAP["mentor"])
    goal_reason = state.get("goal_reason") or "m·ª•c ti√™u ti·∫øt ki·ªám ƒë√£ thi·∫øt l·∫≠p"
    goal_deadline = planning_context.get("goal_deadline")
    horizon = planning_context.get("horizon_days", 7)
    special_days_short = _short_special_days(planning_context)
    recent = _recent_spends(state, 3)

    # ƒê∆∞a spend m·ªõi v√†o log (n·∫øu ch∆∞a)
    new_rec = add_spend_event(user_text)

    system_instruction = (
        "B·∫°n l√† tr·ª£ l√Ω t√†i ch√≠nh theo persona. H√£y tr·∫£ l·ªùi H·ªòI THO·∫†I t·ª± nhi√™n (1‚Äì2 c√¢u) theo phong c√°ch sau:\n"
        f"- {persona_desc}\n\n"
        "R√†ng bu·ªôc:\n"
        "1) Lu√¥n ch·ªët 1 m·ªánh l·ªánh h√†nh ƒë·ªông v·ªõi CON S·ªê c·ª• th·ªÉ (v√≠ d·ª•: 'mai tr·ª´ 200.000 kh·ªèi snacks; h√¥m nay d·ª´ng ƒÉn v·∫∑t').\n"
        "2) C·∫•m x√∫c ph·∫°m c√° nh√¢n, tr√°nh c√¥ng k√≠ch th√¢n th·ªÉ. C√≥ th·ªÉ nghi√™m kh·∫Øc/h√≥m h·ªânh nh∆∞ng l·ªãch s·ª±.\n"
        "3) N·∫øu kh√¥ng tr√≠ch ƒë∆∞·ª£c s·ªë ti·ªÅn t·ª´ l·ªùi ng∆∞·ªùi d√πng, ƒë·ªÅ ngh·ªã h·ªç n√™u s·ªë ho·∫∑c ƒë∆∞a m·ª©c tr·∫ßn an to√†n (‚â§100.000 VNƒê).\n"
        "4) Tr·∫£ v·ªÅ JSON duy nh·∫•t theo schema sau (kh√¥ng th√™m ch·ªØ n√†o ngo√†i JSON):\n"
        '{ "message":"<1-2 c√¢u>", "adjustment":{"amount": <s·ªë ho·∫∑c null>, "category":"<chu·ªói ho·∫∑c null>", "when":"today|tomorrow"} }'
    )

    user_payload = {
        "user_text": user_text,
        "goal_reason": goal_reason,
        "goal_deadline": goal_deadline,
        "horizon_days": horizon,
        "special_days": special_days_short,
        "recent_spends": recent,
        "new_spend": new_rec,
        "plan_hint": plan_json.get("week_plan", [])[:2]
    }

    model_json = genai.GenerativeModel(
        model_name=os.environ.get("GEMINI_MODEL", "gemini-2.0-flash"),
        system_instruction=system_instruction,
        generation_config={
            "temperature": 0.6,
            "response_mime_type": "application/json"
        }
    )

    resp = model_json.generate_content(json.dumps(user_payload, ensure_ascii=False))
    text = resp.candidates[0].content.parts[0].text if resp and resp.candidates else ""
    try:
        out = json.loads(text)
    except json.JSONDecodeError as e:
        import re
        m = re.search(r"\{[\s\S]*\}\s*$", text.strip())
        if not m:
            raise e
        out = json.loads(m.group(0))

    if "adjustment" not in out:
        out["adjustment"] = {"amount": None, "category": None, "when": "today"}
    if out["adjustment"].get("category") in [None, "", "unknown"]:
        out["adjustment"]["category"] = new_rec.get("category") or "other"

    return out

print("‚úÖ Cell B ready: llm_persona_reply() is available.")


‚úÖ Cell B ready: llm_persona_reply() is available.


In [33]:
# # V√≠ d·ª• th·ª≠ nhanh (ngo√†i chat loop)
# set_persona("angry_mom")
# set_goal_reason("ƒëi du l·ªãch Nha Trang")
# reply = llm_persona_reply("nay th√®m ƒë·ªì ng·ªçt qu√°, l·ª° mua 200k b√°nh r·ªìi", session_state, planning_context_14d, week_plan_json_14d)
# reply


In [None]:
# Cell UI: Chat demo b·∫±ng ipywidgets (3 n√∫t persona + chatbox)
import os, json
import ipywidgets as w
from IPython.display import display, clear_output

# Tr·∫°ng th√°i UI
ui_state = {
    "persona": session_state.get("persona", "mentor"),
    "horizon": 7,
    "pc": None,
    "plan": None,
}

# N√∫t ch·ªçn persona
btn_mentor = w.Button(description="Mentor", button_style="info")
btn_angry  = w.Button(description="Angry mom", button_style="warning")
btn_banter = w.Button(description="Banter", button_style="success")

# Ch·ªçn horizon
horizon_dd = w.Dropdown(options=[7,14], value=7, description="Horizon")

# N√∫t kh·ªüi t·∫°o k·∫ø ho·∫°ch
btn_init = w.Button(description="Kh·ªüi t·∫°o k·∫ø ho·∫°ch", button_style="primary")

# Khu v·ª±c log v√† chat
out_plan = w.Output(layout={"border":"1px solid #ccc"})
chat_input = w.Text(placeholder="Nh·∫≠p tin nh·∫Øn (vd: U·ªëng tr√† s·ªØa 100k)")
btn_send = w.Button(description="G·ª≠i", button_style="primary")
chat_log = w.Output(layout={"border":"1px solid #ccc", "height":"250px", "overflow":"auto"})

# Handlers

def _set_persona(p):
    set_persona(p)
    ui_state["persona"] = p
    with chat_log:
        print(f"[system] persona = {p}")

def _on_click_persona(b):
    if b is btn_mentor:
        _set_persona("mentor")
    elif b is btn_angry:
        _set_persona("angry_mom")
    else:
        _set_persona("banter")

btn_mentor.on_click(_on_click_persona)
btn_angry.on_click(_on_click_persona)
btn_banter.on_click(_on_click_persona)


def _init_plan(_):
    ui_state["horizon"] = int(horizon_dd.value)
    # T·∫°o planning context t·ª´ h√†ng ƒë·∫ßu ti√™n df
    pc = build_planning_context_from_row(df.iloc[0].to_dict(), horizon_days=ui_state["horizon"], persona=ui_state["persona"])
    ui_state["pc"] = pc
    try:
        plan = generate_week_plan_json(pc)
    except Exception as e:
        plan = {"week_plan": [], "supervision_note": f"LLM unavailable: {e}"}
    ui_state["plan"] = plan
    with out_plan:
        clear_output()
        print("üìÖ K·∫ø ho·∫°ch:")
        for day in plan.get("week_plan", [])[:ui_state["horizon"]]:
            print(f"{day['date']}: {' | '.join(day['tasks'])}")
        print("\nüîí supervision_note:", plan.get("supervision_note"))

btn_init.on_click(_init_plan)


def _send(_):
    text = chat_input.value.strip()
    if not text:
        return
    chat_input.value = ""
    with chat_log:
        print(f"üë§ B·∫°n: {text}")
    pc = ui_state.get("pc")
    plan = ui_state.get("plan") or {"week_plan": []}
    if not pc:
        with chat_log:
            print("ü§ñ LLM: H√£y kh·ªüi t·∫°o k·∫ø ho·∫°ch tr∆∞·ªõc (nh·∫•n 'Kh·ªüi t·∫°o k·∫ø ho·∫°ch').")
        return
    try:
        resp = llm_persona_reply(text, session_state, pc, plan)
        msg = resp.get("message") or "(no message)"
        with chat_log:
            print(f"ü§ñ {ui_state['persona']}: {msg}")
    except Exception as e:
        with chat_log:
            print(f"ü§ñ LLM error: {e}")

btn_send.on_click(_send)

# Layout
persona_box = w.HBox([w.HTML("<b>Persona:</b>&nbsp;"), btn_mentor, btn_angry, btn_banter])
init_box = w.HBox([horizon_dd, btn_init])
input_box = w.HBox([chat_input, btn_send])

ui = w.VBox([
    persona_box,
    init_box,
    w.HTML("<hr>"),
    w.HTML("<b>K·∫ø ho·∫°ch</b>"),
    out_plan,
    w.HTML("<b>Chat</b>"),
    chat_log,
    input_box,
])

display(ui)
print("‚úÖ UI chat s·∫µn s√†ng. Ch·ªçn persona, kh·ªüi t·∫°o k·∫ø ho·∫°ch, r·ªìi nh·∫≠p tin nh·∫Øn.")



VBox(children=(HBox(children=(HTML(value='<b>Persona:</b>&nbsp;'), Button(button_style='info', description='Me‚Ä¶

‚úÖ UI chat s·∫µn s√†ng. Ch·ªçn persona, kh·ªüi t·∫°o k·∫ø ho·∫°ch, r·ªìi nh·∫≠p tin nh·∫Øn.


In [35]:
# DDL: t·∫°o b·∫£ng persona_plans, persona_plan_days, persona_chat_logs, persona_spend_events
from sqlalchemy import text

DDL_PERSONA = [
    """
    CREATE TABLE IF NOT EXISTS persona_plans (
      plan_id UUID PRIMARY KEY,
      customer_id INT NOT NULL,
      year_month VARCHAR(7),
      persona TEXT,
      horizon_days INT,
      goal_text TEXT,
      goal_amount NUMERIC(18,2),
      target_date DATE,
      weekly_save NUMERIC(18,2),
      feasibility TEXT,
      reasons JSONB,
      supervision_note TEXT,
      version INT DEFAULT 1,
      committed_by TEXT,
      created_at TIMESTAMPTZ DEFAULT now()
    )
    """,
    """
    CREATE TABLE IF NOT EXISTS persona_plan_days (
      plan_id UUID NOT NULL REFERENCES persona_plans(plan_id) ON DELETE CASCADE,
      date DATE NOT NULL,
      tasks JSONB,
      PRIMARY KEY (plan_id, date)
    )
    """,
    """
    CREATE TABLE IF NOT EXISTS persona_chat_logs (
      chat_id UUID PRIMARY KEY,
      customer_id INT,
      plan_id UUID NULL REFERENCES persona_plans(plan_id) ON DELETE SET NULL,
      ts TIMESTAMPTZ DEFAULT now(),
      role TEXT,
      text TEXT,
      payload JSONB
    )
    """,
    """
    CREATE TABLE IF NOT EXISTS persona_spend_events (
      event_id UUID PRIMARY KEY,
      customer_id INT,
      ts TIMESTAMPTZ DEFAULT now(),
      text TEXT,
      amount NUMERIC(18,2),
      category TEXT,
      source TEXT,
      linked_plan_id UUID NULL REFERENCES persona_plans(plan_id) ON DELETE SET NULL
    )
    """,
    # Indexes
    "CREATE INDEX IF NOT EXISTS idx_persona_plans_cid_created ON persona_plans(customer_id, created_at DESC)",
    "CREATE INDEX IF NOT EXISTS idx_persona_plans_cid_ym ON persona_plans(customer_id, year_month)",
    "CREATE INDEX IF NOT EXISTS idx_persona_plan_days_plan ON persona_plan_days(plan_id)",
    "CREATE INDEX IF NOT EXISTS idx_persona_chat_logs_cid_ts ON persona_chat_logs(customer_id, ts DESC)",
    "CREATE INDEX IF NOT EXISTS idx_persona_chat_logs_plan_ts ON persona_chat_logs(plan_id, ts DESC)",
    "CREATE INDEX IF NOT EXISTS idx_persona_spends_cid_ts ON persona_spend_events(customer_id, ts DESC)",
    "CREATE INDEX IF NOT EXISTS idx_persona_spends_plan ON persona_spend_events(linked_plan_id)"
]

if db_available():
    with engine.begin() as conn:
        for stmt in DDL_PERSONA:
            conn.execute(text(stmt))
    print("‚úÖ Persona tables ensured in PostgreSQL.")
else:
    print("‚ÑπÔ∏è Skip DDL: DB not available.")



‚úÖ Persona tables ensured in PostgreSQL.


In [36]:
# Helpers: CRUD cho persona_plans, persona_plan_days, persona_chat_logs, persona_spend_events
import uuid
from typing import List, Dict, Any, Optional
from sqlalchemy import text

def _u():
    return str(uuid.uuid4())

def db_insert_persona_plan(conn, plan: Dict[str, Any], days: List[Dict[str, Any]]):
    """plan: dict c√°c c·ªôt c·ªßa persona_plans (kh√¥ng g·ªìm created_at)
       days: list {date: YYYY-MM-DD, tasks: list[str]}
    """
    if "plan_id" not in plan:
        plan["plan_id"] = _u()
    plan_id = plan["plan_id"]
    # Insert plan
    cols = [k for k in plan.keys()]
    placeholders = ",".join([f":{k}" for k in cols])
    sql_plan = text(f"""
        INSERT INTO persona_plans({','.join(cols)})
        VALUES ({placeholders})
    """)
    conn.execute(sql_plan, plan)
    # Insert days
    if days:
        recs = [{"plan_id": plan_id, "date": d["date"], "tasks": json.dumps(d.get("tasks", []), ensure_ascii=False)} for d in days]
        sql_day = text("""
            INSERT INTO persona_plan_days(plan_id, date, tasks)
            VALUES (:plan_id, :date, CAST(:tasks AS JSONB))
            ON CONFLICT (plan_id, date) DO UPDATE SET tasks = EXCLUDED.tasks
        """)
        conn.execute(sql_day, recs)
    return plan_id

def db_upsert_chat(conn, customer_id: int, role: str, text_msg: str, payload: Optional[Dict[str, Any]] = None, plan_id: Optional[str] = None):
    rec = {
        "chat_id": _u(),
        "customer_id": customer_id,
        "plan_id": plan_id,
        "role": role,
        "text": text_msg,
        "payload": json.dumps(payload or {}, ensure_ascii=False)
    }
    sql = text("""
        INSERT INTO persona_chat_logs(chat_id, customer_id, plan_id, role, text, payload)
        VALUES (:chat_id, :customer_id, :plan_id, :role, :text, CAST(:payload AS JSONB))
    """)
    conn.execute(sql, rec)

def db_insert_spend_event(conn, customer_id: int, text_msg: str, amount: Optional[float], category: Optional[str], source: str = "user", plan_id: Optional[str] = None):
    rec = {
        "event_id": _u(),
        "customer_id": customer_id,
        "text": text_msg,
        "amount": amount,
        "category": category,
        "source": source,
        "linked_plan_id": plan_id
    }
    sql = text("""
        INSERT INTO persona_spend_events(event_id, customer_id, text, amount, category, source, linked_plan_id)
        VALUES (:event_id, :customer_id, :text, :amount, :category, :source, :linked_plan_id)
    """)
    conn.execute(sql, rec)

print("‚úÖ CRUD helpers ready.")



‚úÖ CRUD helpers ready.


In [37]:
# UI: ch·ªçn customer_id/year_month t·ª´ DB (fallback CSV)
import ipywidgets as w
from IPython.display import display, clear_output
from sqlalchemy import text

cust_dd = w.Dropdown(options=[], description="Customer")
ym_dd = w.Dropdown(options=[], description="Year-Month")
refresh_btn = w.Button(description="T·∫£i danh s√°ch", button_style="info")
load_btn = w.Button(description="N·∫°p h·ªì s∆°", button_style="primary")
preview_out = w.Output(layout={"border":"1px solid #ccc"})

_customer_row = {"customer_id": None, "year_month": None}


def _load_lists(_):
    options_c = []
    options_ym = []
    if db_available():
        with engine.connect() as conn:
            dfc = pd.read_sql(text("SELECT DISTINCT customer_id FROM features_monthly ORDER BY customer_id ASC LIMIT 200"), conn)
            dfm = pd.read_sql(text("SELECT DISTINCT year_month FROM features_monthly ORDER BY year_month DESC LIMIT 12"), conn)
            options_c = list(dfc["customer_id"].astype(int).tolist())
            options_ym = list(dfm["year_month"].astype(str).tolist())
    else:
        dfc = pd.read_csv(os.path.join(DATA_DIR, "features_monthly.csv"))
        options_c = sorted(dfc["customer_id"].astype(int).unique().tolist())[:200]
        options_ym = sorted(dfc["year_month"].astype(str).unique().tolist())[::-1][:12]
    cust_dd.options = options_c
    ym_dd.options = options_ym


def _load_profile(_):
    cid = int(cust_dd.value)
    ym = str(ym_dd.value)
    if db_available():
        with engine.connect() as conn:
            sql = text("""
                SELECT f.*, l.label_interest
                FROM features_monthly f
                LEFT JOIN labels l USING (customer_id, year_month)
                WHERE f.customer_id = :cid AND f.year_month = :ym
                LIMIT 1
            """)
            row = pd.read_sql(sql, conn, params={"cid": cid, "ym": ym})
    else:
        dfc = pd.read_csv(os.path.join(DATA_DIR, "features_monthly.csv"))
        dfl = pd.read_csv(os.path.join(DATA_DIR, "labels.csv"))
        row = dfc.merge(dfl, on=["customer_id","year_month"], how="left")
        row = row[(row["customer_id"]==cid) & (row["year_month"]==ym)].head(1)
    with preview_out:
        clear_output()
        if row is None or row.empty:
            print("‚ùå Kh√¥ng t√¨m th·∫•y h·ªì s∆°.")
        else:
            display(row)
            _customer_row["customer_id"] = int(row.iloc[0]["customer_id"]) 
            _customer_row["year_month"] = str(row.iloc[0]["year_month"]) 
            print("‚úÖ ƒê√£ n·∫°p h·ªì s∆°.")

refresh_btn.on_click(_load_lists)
load_btn.on_click(_load_profile)

display(w.HBox([refresh_btn, load_btn]))
display(w.HBox([cust_dd, ym_dd]))
display(preview_out)

print("‚ÑπÔ∏è B·∫•m 'T·∫£i danh s√°ch' ‚Üí ch·ªçn customer/year_month ‚Üí 'N·∫°p h·ªì s∆°'.")



HBox(children=(Button(button_style='info', description='T·∫£i danh s√°ch', style=ButtonStyle()), Button(button_st‚Ä¶

HBox(children=(Dropdown(description='Customer', options=(), value=None), Dropdown(description='Year-Month', op‚Ä¶

Output(layout=Layout(border_bottom='1px solid #ccc', border_left='1px solid #ccc', border_right='1px solid #cc‚Ä¶

‚ÑπÔ∏è B·∫•m 'T·∫£i danh s√°ch' ‚Üí ch·ªçn customer/year_month ‚Üí 'N·∫°p h·ªì s∆°'.


In [38]:
# Affordability & Plan proposal (deterministic core)
from datetime import date, timedelta

def affordability_from_row(row: dict, horizon_days: int, goal_amount: int | None, target_months: int | None) -> dict:
    """
    T√≠nh weekly_cap_save v√† ƒë·ªÅ xu·∫•t m·ª©c ti·∫øt ki·ªám tu·∫ßn ph√π h·ª£p d·ª±a tr√™n d·ªØ li·ªáu kh√°ch.
    - income_net ‚âà income (fallback)
    - fixed_bills ‚âà loan (fallback)
    - variable_spend ‚âà spend (fallback)
    """
    income_net = int(row.get("income_net_month", row.get("income", 0)) or 0)
    fixed_bills = int(row.get("fixed_bills_month", row.get("loan", 0)) or 0)
    variable_spend = int(row.get("variable_spend_month", row.get("spend", 0)) or 0)

    base_monthly_surplus = max(0, income_net - fixed_bills - int(0.8 * variable_spend))
    # tr·∫ßn tu·∫ßn ~ 25% surplus th√°ng (ch·ª´a d∆∞ ƒë·ªãa ph√°t sinh)
    weekly_cap_save = max(0, int(base_monthly_surplus * 0.25))

    # n·∫øu ng∆∞·ªùi d√πng ƒë∆∞a ra m·ª•c ti√™u d√†i h·∫°n
    recommended_weekly = weekly_cap_save
    feasibility = "unknown"
    rationale = []

    if goal_amount and target_months:
        needed_per_week = int(goal_amount / (target_months * 4.0))
        if needed_per_week <= weekly_cap_save:
            feasibility = "ok"
            recommended_weekly = needed_per_week
            rationale.append(f"C·∫ßn {needed_per_week:,}ƒë/tu·∫ßn ƒë·ªÉ ƒë·∫°t m·ª•c ti√™u trong {target_months} th√°ng, trong tr·∫ßn {weekly_cap_save:,}ƒë/tu·∫ßn.")
        else:
            feasibility = "adjust"
            recommended_weekly = weekly_cap_save
            rationale.append(f"M·ª•c ti√™u y√™u c·∫ßu kho·∫£ng {needed_per_week:,}ƒë/tu·∫ßn nh∆∞ng tr·∫ßn kh·∫£ d·ª•ng ch·ªâ {weekly_cap_save:,}ƒë/tu·∫ßn. ƒê·ªÅ xu·∫•t k√©o d√†i th·ªùi gian ho·∫∑c gi·∫£m s·ªë ti·ªÅn.")
    else:
        feasibility = "ok" if weekly_cap_save > 0 else "adjust"
        rationale.append("T·∫°m t√≠nh theo tr·∫ßn tu·∫ßn t·ª´ d∆∞ th·∫∑ng th√°ng.")

    return {
        "weekly_cap_save": weekly_cap_save,
        "recommended_weekly_save": recommended_weekly,
        "feasibility": feasibility,
        "reasons": rationale
    }


def propose_week_plan(row: dict, start_date: date, horizon_days: int, weekly_save: int) -> list[dict]:
    """
    Sinh k·∫ø ho·∫°ch 7/14 ng√†y theo s·ªë ti·ªÅn weekly_save. 2-3 task/ng√†y, √°p d·ª•ng special_days ƒë∆°n gi·∫£n.
    """
    tasks_per_day = max(2, min(3, weekly_save // 200000 if weekly_save else 2))
    per_day_save = max(0, int(weekly_save / (horizon_days if horizon_days >= 7 else 7)))

    # special_days ƒë∆°n gi·∫£n t·ª´ row: payday_dom_list, bill_dom_list, events_dom_list
    payday_dom_str = str(row.get("payday_dom", "") or "").replace(",",";")
    bills = str(row.get("bill_dom_list", "") or "")
    events = str(row.get("events_dom_list", "") or "")

    def parse_dom_list(s):
        out = []
        for token in [t.strip() for t in s.split(";") if t.strip()]:
            try:
                if ":" in token:
                    dom, rest = token.split(":", 1)
                    out.append(int(dom))
                else:
                    out.append(int(token))
            except: 
                pass
        return out

    payday_doms = parse_dom_list(payday_dom_str)

    plan = []
    cur = start_date
    for _ in range(horizon_days):
        day_tasks = []
        # payday ∆∞u ti√™n chuy·ªÉn ti·ªÅn
        if cur.day in payday_doms:
            day_tasks.append(f"Nh·∫≠n l∆∞∆°ng ‚Üí chuy·ªÉn ngay {min(per_day_save*2, max(per_day_save, 100000)):,}ƒë v√†o qu·ªπ")
        # ti·∫øt ki·ªám theo ng√†y
        if per_day_save:
            day_tasks.append(f"Gi·ªØ ti·∫øt ki·ªám ng√†y: {per_day_save:,}ƒë (c·∫Øt b·ªõt ƒÉn v·∫∑t/gi·∫£i tr√≠)")
        # m·ªôt h√†nh ƒë·ªông h√†nh vi nh·ªè
        day_tasks.append("Kh√¥ng mua tr√† s·ªØa/cafe h√¥m nay; ƒëi b·ªô 15 ph√∫t")
        plan.append({"date": cur.isoformat(), "tasks": [t.replace(",",".") for t in day_tasks]})
        cur += timedelta(days=1)
    return plan

print("‚úÖ affordability() & propose_week_plan() ready.")



‚úÖ affordability() & propose_week_plan() ready.


In [None]:
# UI: Nh·∫≠p mong mu·ªën, ƒë·ªÅ xu·∫•t k·∫ø ho·∫°ch 7/14 ng√†y, x√°c nh·∫≠n & l∆∞u
import ipywidgets as w
from IPython.display import display, clear_output
from datetime import date
from sqlalchemy import text

wish_text = w.Text(placeholder="V√≠ d·ª•: mu·ªën d√†nh 15 tri·ªáu trong 6 th√°ng ƒë·ªÉ mua ƒëi·ªán tho·∫°i")
amount_in = w.IntText(description="S·ªë ti·ªÅn (ƒë)", value=15000000)
months_in = w.IntSlider(description="Th·ªùi gian (th√°ng)", min=1, max=36, value=6)
horizon_dd2 = w.RadioButtons(options=[7,14], description="Horizon", value=7)
persona_rb = w.ToggleButtons(options=[("Mentor","mentor"),("Angry mom","angry_mom"),("Banter","banter")], description="Persona")
propose_btn = w.Button(description="ƒê·ªÅ xu·∫•t k·∫ø ho·∫°ch", button_style="primary")
confirm_btn = w.Button(description="ƒê·ªìng √Ω & l∆∞u", button_style="success")
change_btn = w.Button(description="Mu·ªën ch·ªânh (regen)", button_style="warning")
feedback_in = w.Text(placeholder="V√≠ d·ª•: gi·∫£m ti·∫øt ki·ªám tu·∫ßn xu·ªëng 400k v√† th√™m 1 ng√†y ngh·ªâ chi ti√™u")

proposal_out = w.Output(layout={"border":"1px solid #ccc"})

_current_plan = {"plan": None, "days": None, "plan_id": None}


def _propose(_):
    proposal_out.clear_output()
    if not _customer_row.get("customer_id"):
        with proposal_out:
            print("‚ùå Ch∆∞a n·∫°p h·ªì s∆° kh√°ch h√†ng.")
        return
    cid = int(_customer_row["customer_id"])
    ym = str(_customer_row["year_month"]) or None
    persona = persona_rb.value
    horizon = int(horizon_dd2.value)

    # L·∫•y row l·∫°i ƒë·ªÉ t√≠nh ch√≠nh x√°c
    if db_available():
        with engine.connect() as conn:
            sql = text("""
                SELECT f.*, l.label_interest
                FROM features_monthly f
                LEFT JOIN labels l USING (customer_id, year_month)
                WHERE f.customer_id = :cid AND f.year_month = :ym
                LIMIT 1
            """)
            row_df = pd.read_sql(sql, conn, params={"cid": cid, "ym": ym})
    else:
        dfc = pd.read_csv(os.path.join(DATA_DIR, "features_monthly.csv"))
        dfl = pd.read_csv(os.path.join(DATA_DIR, "labels.csv"))
        row_df = dfc.merge(dfl, on=["customer_id","year_month"], how="left")
        row_df = row_df[(row_df["customer_id"]==cid) & (row_df["year_month"]==ym)].head(1)

    if row_df is None or row_df.empty:
        with proposal_out:
            print("‚ùå Kh√¥ng t√¨m th·∫•y h·ªì s∆° ƒë·ªÉ ƒë·ªÅ xu·∫•t.")
        return

    row = row_df.iloc[0].to_dict()

    af = affordability_from_row(row, horizon_days=horizon, goal_amount=int(amount_in.value or 0), target_months=int(months_in.value or 0))
    weekly = int(af["recommended_weekly_save"])

    # ∆Øu ti√™n g·ªçi LLM + validate schema; fallback deterministic
    try:
        proposal = llm_generate_plan_proposal_cached(persona, wish_text.value or "", row, horizon, af["weekly_cap_save"], weekly)
        days = [d.model_dump() for d in proposal.week_plan]
        weekly = int(proposal.recommended_weekly_save)
        supervision_note = proposal.supervision_note
        reasons = proposal.reasons
    except Exception:
        start_d = date.today()
        days = propose_week_plan(row, start_d, horizon, weekly)
        supervision_note = "T√¥i s·∫Ω gi√°m s√°t tu·∫ßn n√†y. ƒê·∫°t ‚Üí l·∫∑p l·∫°i; Kh√¥ng ƒë·∫°t ‚Üí ƒëi·ªÅu ch·ªânh."
        reasons = af["reasons"]

    plan = {
        "customer_id": cid,
        "year_month": ym,
        "persona": persona,
        "horizon_days": horizon,
        "goal_text": wish_text.value or "",
        "goal_amount": int(amount_in.value or 0),
        "target_date": date.today().replace(day=28) + pd.DateOffset(months=int(months_in.value or 0)),
        "weekly_save": weekly,
        "feasibility": af["feasibility"],
        "reasons": json.dumps(reasons, ensure_ascii=False),
        "supervision_note": supervision_note,
        "committed_by": persona,
    }

    _current_plan["plan"] = plan
    _current_plan["days"] = days

    with proposal_out:
        print(f"ƒê·ªÅ xu·∫•t cho customer {cid} ({ym}), persona={persona}")
        print(f"Feasibility: {af['feasibility']} | weekly_save ƒë·ªÅ xu·∫•t: {weekly:,}ƒë/tu·∫ßn")
        print("L√Ω do:")
        for r in reasons:
            print(" -", r)
        print("\nK·∫ø ho·∫°ch:")
        for d in days:
            print(d["date"], ": ", " | ".join(d["tasks"]))
        print("\nB·∫°n c√≥ mu·ªën l∆∞u k·∫ø ho·∫°ch n√†y kh√¥ng? (ƒê·ªìng √Ω & l∆∞u / Mu·ªën ch·ªânh)")


def _confirm(_):
    if not _current_plan.get("plan"):
        with proposal_out:
            print("‚ùå Ch∆∞a c√≥ k·∫ø ho·∫°ch ƒë·ªÉ l∆∞u.")
        return
    if not db_available():
        with proposal_out:
            print("‚ÑπÔ∏è DB kh√¥ng s·∫µn s√†ng, b·ªè qua b∆∞·ªõc l∆∞u.")
        return
    with engine.begin() as conn:
        plan = _current_plan["plan"].copy()
        # Chuy·ªÉn target_date v·ªÅ string YYYY-MM-DD
        td = plan.get("target_date")
        if hasattr(td, "strftime"):
            plan["target_date"] = td.strftime("%Y-%m-%d")
        plan_id = db_insert_persona_plan(conn, plan, _current_plan["days"]) 
        _current_plan["plan_id"] = plan_id
        db_upsert_chat(conn, customer_id=plan["customer_id"], role="assistant", text_msg="Plan committed", payload={"plan_id": plan_id}, plan_id=plan_id)
    with proposal_out:
        print("‚úÖ ƒê√£ l∆∞u k·∫ø ho·∫°ch. plan_id=", _current_plan["plan_id"]) 


def _diff_plans(old_days: list, new_days: list) -> list:
    by_date = {d["date"]: d for d in old_days}
    diffs = []
    for d in new_days:
        od = by_date.get(d["date"]) or {"tasks": []}
        if od["tasks"] != d["tasks"]:
            diffs.append({
                "date": d["date"],
                "old": od.get("tasks", []),
                "new": d["tasks"]
            })
    return diffs


def _regen(_):
    if not _current_plan.get("plan"):
        with proposal_out:
            print("‚ùå Ch∆∞a c√≥ k·∫ø ho·∫°ch ƒë·ªÉ ch·ªânh.")
        return
    persona = persona_rb.value
    horizon = int(horizon_dd2.value)

    # L·∫•y l·∫°i h·ªì s∆°
    cid = int(_customer_row["customer_id"]) if _customer_row.get("customer_id") else None
    ym = str(_customer_row["year_month"]) if _customer_row.get("year_month") else None
    if cid is None:
        with proposal_out:
            print("‚ùå Ch∆∞a n·∫°p h·ªì s∆°.")
        return
    if db_available():
        with engine.connect() as conn:
            sql = text("""
                SELECT f.*, l.label_interest
                FROM features_monthly f
                LEFT JOIN labels l USING (customer_id, year_month)
                WHERE f.customer_id = :cid AND f.year_month = :ym
                LIMIT 1
            """)
            row_df = pd.read_sql(sql, conn, params={"cid": cid, "ym": ym})
    else:
        dfc = pd.read_csv(os.path.join(DATA_DIR, "features_monthly.csv"))
        dfl = pd.read_csv(os.path.join(DATA_DIR, "labels.csv"))
        row_df = dfc.merge(dfl, on=["customer_id","year_month"], how="left")
        row_df = row_df[(row_df["customer_id"]==cid) & (row_df["year_month"]==ym)].head(1)
    if row_df is None or row_df.empty:
        with proposal_out:
            print("‚ùå Kh√¥ng t√¨m th·∫•y h·ªì s∆°.")
        return
    row = row_df.iloc[0].to_dict()

    # Gi·ªØ tr·∫ßn tu·∫ßn c≈© l√†m tham chi·∫øu, nh∆∞ng cho ph√©p user feedback ·∫£nh h∆∞·ªüng n·ªôi dung
    af = affordability_from_row(row, horizon_days=horizon, goal_amount=int(amount_in.value or 0), target_months=int(months_in.value or 0))
    weekly = int(af["recommended_weekly_save"])

    # Gh√©p feedback v√†o user_goal_text
    goal_text = (wish_text.value or "")
    fb = feedback_in.value.strip()
    user_goal_text = f"{goal_text} | feedback: {fb}" if fb else goal_text

    try:
        proposal = llm_generate_plan_proposal(persona, user_goal_text, row, horizon, af["weekly_cap_save"], weekly)
        new_days = [d.model_dump() for d in proposal.week_plan]
        weekly = int(proposal.recommended_weekly_save)
        supervision_note = proposal.supervision_note
        reasons = proposal.reasons
    except Exception:
        from datetime import date
        new_days = propose_week_plan(row, date.today(), horizon, weekly)
        supervision_note = "T√¥i s·∫Ω gi√°m s√°t tu·∫ßn n√†y. ƒê·∫°t ‚Üí l·∫∑p l·∫°i; Kh√¥ng ƒë·∫°t ‚Üí ƒëi·ªÅu ch·ªânh."
        reasons = af["reasons"]

    old_days = _current_plan.get("days") or []
    diffs = _diff_plans(old_days, new_days)
    _current_plan["days"] = new_days
    _current_plan["plan"]["weekly_save"] = weekly
    _current_plan["plan"]["supervision_note"] = supervision_note
    _current_plan["plan"]["reasons"] = json.dumps(reasons, ensure_ascii=False)

    with proposal_out:
        print("üîÅ K·∫ø ho·∫°ch ƒë√£ ƒëi·ªÅu ch·ªânh.")
        if not diffs:
            print("(Kh√¥ng c√≥ thay ƒë·ªïi so v·ªõi b·∫£n tr∆∞·ªõc)")
        else:
            print("C√°c thay ƒë·ªïi:")
            for d in diffs:
                print(f" - {d['date']}")
                print("   c≈©:", " | ".join(d["old"]))
                print("   m·ªõi:", " | ".join(d["new"]))

propose_btn.on_click(_propose)
confirm_btn.on_click(_confirm)
change_btn.on_click(_regen)

display(w.VBox([
    w.HTML("<b>Mong mu·ªën</b>"),
    wish_text,
    feedback_in,
    w.HBox([amount_in, months_in, horizon_dd2]),
    persona_rb,
    w.HBox([propose_btn, confirm_btn, change_btn]),
    proposal_out
]))



VBox(children=(HTML(value='<b>Mong mu·ªën</b>'), Text(value='', placeholder='V√≠ d·ª•: mu·ªën d√†nh 15 tri·ªáu trong 6 t‚Ä¶

In [40]:
# Prompt + JSON schema (pydantic) cho plan proposal
from pydantic import BaseModel, Field, ValidationError, field_validator
from typing import List, Optional

class Adjustment(BaseModel):
    amount: Optional[int] = Field(None, description="S·ªë ti·ªÅn ƒëi·ªÅu ch·ªânh (VND)")
    category: Optional[str] = Field(None, description="Danh m·ª•c ƒëi·ªÅu ch·ªânh")
    when: str = Field("today", pattern="^(today|tomorrow)$")

class PlanDay(BaseModel):
    date: str
    tasks: List[str]

class PlanProposal(BaseModel):
    feasibility: str = Field(pattern="^(ok|adjust)$")
    weekly_cap_save: int
    recommended_weekly_save: int
    reasons: List[str]
    proposal: dict
    week_plan: List[PlanDay]
    supervision_note: str
    confirm_question: str

    @field_validator("week_plan")
    @classmethod
    def limit_length(cls, v: List[PlanDay]):
        if not (7 <= len(v) <= 14):
            raise ValueError("week_plan ph·∫£i c√≥ 7 ho·∫∑c 14 ng√†y")
        return v

STYLEBOOK = {
    "mentor": {
        "tone": "T√¥n tr·ªçng, ƒë·ªông vi√™n, lu√¥n c√≥ con s·ªë v√† l√Ω do ng·∫Øn.",
        "forbid": ["mi·ªát th·ªã", "x√∫c ph·∫°m", "ra l·ªánh v√¥ cƒÉn c·ª©"],
        "closing": "B·∫°n th·∫•y ·ªïn ch·ª©?"
    },
    "angry_mom": {
        "tone": "Nghi√™m kh·∫Øc nh∆∞ng l·ªãch s·ª±, kh√¥ng x√∫c ph·∫°m; lu√¥n ƒë∆∞a l·ª±a ch·ªçn thay th·∫ø.",
        "forbid": ["coi th∆∞·ªùng", "ƒëe do·∫°"],
        "closing": "Ch·ªët v·∫≠y ƒë∆∞·ª£c kh√¥ng?"
    },
    "banter": {
        "tone": "H√≥m h·ªânh nh·∫π, kh√¥ng ch√¢m ch·ªçc c√° nh√¢n; v·∫´n ph·∫£i c√≥ s·ªë li·ªáu.",
        "forbid": ["m·ªâa mai c√° nh√¢n", "ch√™ bai c∆° th·ªÉ"],
        "closing": "Ok m√¨nh ch∆°i k√®o n√†y nh√©?"
    }
}

SYSTEM_PROMPT = (
    "B·∫°n l√† tr·ª£ l√Ω t√†i ch√≠nh theo persona, nhi·ªám v·ª•: \n"
    "1) Ki·ªÉm tra kh·∫£ thi d·ª±a v√†o d·ªØ li·ªáu t√†i ch√≠nh c·ªßa ng∆∞·ªùi d√πng (thu nh·∫≠p r√≤ng, chi ph√≠ c·ªë ƒë·ªãnh, chi ti√™u bi·∫øn ƒë·ªïi, s·ª± ki·ªán).\n"
    "2) N·∫øu kh·∫£ thi: ƒë·ªÅ xu·∫•t k·∫ø ho·∫°ch 7 ho·∫∑c 14 ng√†y, m·ªói ng√†y 2‚Äì4 task ƒëo l∆∞·ªùng ƒë∆∞·ª£c, t·ªïng weekly_save ‚â§ tr·∫ßn kh·∫£ thi.\n"
    "3) N·∫øu ch∆∞a kh·∫£ thi: ƒë·ªÅ xu·∫•t ƒëi·ªÅu ch·ªânh (gi·∫£m s·ªë ti·ªÅn/gi√£n th·ªùi gian) k√®m l√Ω do ng·∫Øn.\n"
    "4) Persona ch·ªâ l√† gi·ªçng ƒëi·ªáu; TUY·ªÜT ƒê·ªêI kh√¥ng x√∫c ph·∫°m/c√¥ng k√≠ch.\n"
    "5) Ch·ªâ tr·∫£ v·ªÅ JSON ƒë√∫ng schema PlanProposal (kh√¥ng th√™m ch·ªØ n√†o ngo√†i JSON).\n"
)

print("‚úÖ Prompt & schema s·∫µn s√†ng.")



‚úÖ Prompt & schema s·∫µn s√†ng.


In [41]:
# H√†m g·ªçi LLM sinh PlanProposal theo SYSTEM_PROMPT + STYLEBOOK, validate b·∫±ng Pydantic

def llm_generate_plan_proposal(persona: str, user_goal_text: str, row: dict, horizon_days: int, weekly_cap_save: int, recommended_weekly: int):
    persona = persona if persona in STYLEBOOK else "mentor"
    tone = STYLEBOOK[persona]["tone"]
    closing = STYLEBOOK[persona]["closing"]

    payload = {
        "persona": persona,
        "tone": tone,
        "user_goal": user_goal_text,
        "horizon_days": horizon_days,
        "weekly_cap_save": weekly_cap_save,
        "recommended_weekly_save": recommended_weekly,
        "row_summary": {
            "income": int(row.get("income_net_month", row.get("income", 0)) or 0),
            "fixed_bills": int(row.get("fixed_bills_month", row.get("loan", 0)) or 0),
            "variable_spend": int(row.get("variable_spend_month", row.get("spend", 0)) or 0),
        },
        "style_closing": closing
    }

    if 'gemini_model' in globals() and gemini_model is not None:
        try:
            import google.generativeai as genai
            mdl = genai.GenerativeModel(
                model_name=os.environ.get("GEMINI_MODEL", "gemini-2.0-flash"),
                system_instruction=SYSTEM_PROMPT,
                generation_config={
                    "temperature": 0.5,
                    "response_mime_type": "application/json"
                }
            )
            resp = mdl.generate_content(json.dumps(payload, ensure_ascii=False))
            text = resp.candidates[0].content.parts[0].text if resp and resp.candidates else ""
            obj = json.loads(text)
            validated = PlanProposal(**obj)
            return validated
        except Exception as e:
            # fallback deterministic
            pass

    # fallback deterministic: x√¢y d·ª±ng t·ª´ propose_week_plan
    from datetime import date
    days = propose_week_plan(row, date.today(), horizon_days, recommended_weekly)
    deterministic = {
        "feasibility": "ok" if recommended_weekly>0 else "adjust",
        "weekly_cap_save": weekly_cap_save,
        "recommended_weekly_save": recommended_weekly,
        "reasons": [
            "Fallback deterministic do LLM kh√¥ng s·∫µn s√†ng."
        ],
        "proposal": {
            "target_amount": None,
            "target_date": None
        },
        "week_plan": days,
        "supervision_note": "T√¥i s·∫Ω gi√°m s√°t tu·∫ßn n√†y. ƒê·∫°t ‚Üí l·∫∑p l·∫°i; Kh√¥ng ƒë·∫°t ‚Üí ƒëi·ªÅu ch·ªânh.",
        "confirm_question": "B·∫°n ƒë·ªìng √Ω k·∫ø ho·∫°ch n√†y kh√¥ng?"
    }
    return PlanProposal(**deterministic)

print("‚úÖ LLM proposal generator ready.")



‚úÖ LLM proposal generator ready.


In [42]:
# UI: Ghi nh·∫≠t k√Ω chat & chi ti√™u, ƒëi·ªÅu ch·ªânh k·∫ø ho·∫°ch theo chi ti√™u ph√°t sinh
import ipywidgets as w
from IPython.display import display, clear_output
from datetime import date

chat_user = w.Text(placeholder="Nh·∫≠p tin nh·∫Øn ƒë·ªÉ log v√†o DB (tu·ª≥ ch·ªçn)")
chat_btn = w.Button(description="Ghi nh·∫≠t k√Ω chat", button_style="info")

spend_text = w.Text(placeholder="V√≠ d·ª•: u·ªëng tr√† s·ªØa 60k")
spend_btn = w.Button(description="Ghi chi ti√™u & ƒëi·ªÅu ch·ªânh", button_style="warning")
log_out = w.Output(layout={"border":"1px solid #ccc"})


def _log_chat(_):
    msg = chat_user.value.strip()
    if not msg:
        return
    with log_out:
        print(f"üë§ user: {msg}")
    if db_available() and _customer_row.get("customer_id"):
        with engine.begin() as conn:
            db_upsert_chat(conn, customer_id=int(_customer_row["customer_id"]), role="user", text_msg=msg, payload={"ym": _customer_row.get("year_month")}, plan_id=_current_plan.get("plan_id"))
    chat_user.value = ""


def _adjust_plan_for_spend(amount: int):
    """ƒê∆°n gi·∫£n: th√™m task b√π v√†o ng√†y h√¥m sau, ho·∫∑c h√¥m nay n·∫øu ch∆∞a c√≥ plan.
    """
    if not _current_plan.get("days"):
        return [], []
    old_days = [dict(d) for d in _current_plan["days"]]
    days = [dict(date=d["date"], tasks=list(d["tasks"])) for d in _current_plan["days"]]
    # ch·ªçn ng√†y b√π: h√¥m nay (n·∫øu c√≥ trong plan) ho·∫∑c ng√†y ƒë·∫ßu ti√™n k·∫ø ti·∫øp
    today_str = date.today().isoformat()
    idx = next((i for i,d in enumerate(days) if d["date"] >= today_str), 0)
    bump = f"B√π l·∫°i {amount:,}ƒë b·∫±ng c√°ch c·∫Øt ƒÉn v·∫∑t/gi·∫£i tr√≠".replace(",",".")
    days[idx]["tasks"].insert(0, bump)
    _current_plan["days"] = days
    return old_days, days


def _spend(_):
    text_msg = spend_text.value.strip()
    if not text_msg:
        return
    # Parse amount & category t·ª´ cell B (ƒë√£ c√≥ _extract_amount, _guess_category, add_spend_event)
    amt = _extract_amount(text_msg)
    cat = _guess_category(text_msg)
    rec = add_spend_event(text_msg)

    # Log DB
    if db_available() and _customer_row.get("customer_id"):
        with engine.begin() as conn:
            db_insert_spend_event(conn, customer_id=int(_customer_row["customer_id"]), text_msg=text_msg, amount=amt, category=cat, source="user", plan_id=_current_plan.get("plan_id"))
            db_upsert_chat(conn, customer_id=int(_customer_row["customer_id"]), role="user", text_msg=f"spend: {text_msg}", payload={"amount": amt, "category": cat}, plan_id=_current_plan.get("plan_id"))

    advice = None
    if _current_plan.get("plan"):
        old_days, new_days = adjust_plan_with_spend_optimal(int(amt or 0))
        diffs = _diff_plans(old_days, new_days) if old_days else []
        advice = f"ƒê√£ th√™m task b√π {amt:,}ƒë v√†o k·∫ø ho·∫°ch.".replace(",",".") if amt else "ƒê√£ ghi nh·∫≠n chi ti√™u."
    else:
        advice = "ƒê√£ ghi nh·∫≠n chi ti√™u. H√£y t·∫°o k·∫ø ho·∫°ch tr∆∞·ªõc ƒë·ªÉ m√¨nh gi√∫p ƒëi·ªÅu ch·ªânh."

    with log_out:
        print(f"üßæ spend: {text_msg} (amt={amt}, cat={cat})")
        print("ü§ñ advice:", advice)
        if _current_plan.get("plan"):
            if not diffs:
                print("(Kh√¥ng c√≥ thay ƒë·ªïi k·∫ø ho·∫°ch ho·∫∑c ch∆∞a c√≥ plan)")
            else:
                print("C√°c thay ƒë·ªïi trong plan:")
                for d in diffs:
                    print(f" - {d['date']}")
                    print("   c≈©:", " | ".join(d["old"]))
                    print("   m·ªõi:", " | ".join(d["new"]))

    # Log assistant advice
    if db_available() and _customer_row.get("customer_id"):
        with engine.begin() as conn:
            db_upsert_chat(conn, customer_id=int(_customer_row["customer_id"]), role="assistant", text_msg=advice, payload={"type":"adjust_on_spend","amount": amt, "category": cat}, plan_id=_current_plan.get("plan_id"))

    spend_text.value = ""

chat_btn.on_click(_log_chat)
spend_btn.on_click(_spend)

display(w.VBox([
    w.HTML("<b>Nh·∫≠t k√Ω & ƒêi·ªÅu ch·ªânh theo chi ti√™u</b>"),
    w.HBox([chat_user, chat_btn]),
    w.HBox([spend_text, spend_btn]),
    log_out
]))



VBox(children=(HTML(value='<b>Nh·∫≠t k√Ω & ƒêi·ªÅu ch·ªânh theo chi ti√™u</b>'), HBox(children=(Text(value='', placehol‚Ä¶

In [43]:
# Cache LLM proposal + ƒëi·ªÅu ch·ªânh k·∫ø ho·∫°ch t·ªëi ∆∞u theo chi ti√™u
import hashlib

PROPOSAL_CACHE: dict[str, dict] = {}

def _cache_key(persona: str, user_goal_text: str, horizon_days: int, weekly_cap_save: int, recommended_weekly: int, row: dict) -> str:
    row_summary = {
        "income": int(row.get("income_net_month", row.get("income", 0)) or 0),
        "fixed_bills": int(row.get("fixed_bills_month", row.get("loan", 0)) or 0),
        "variable_spend": int(row.get("variable_spend_month", row.get("spend", 0)) or 0),
    }
    payload = json.dumps({
        "persona": persona,
        "goal": user_goal_text,
        "horizon": horizon_days,
        "cap": weekly_cap_save,
        "rec": recommended_weekly,
        "row": row_summary,
    }, ensure_ascii=False, sort_keys=True)
    return hashlib.sha256(payload.encode("utf-8")).hexdigest()


def llm_generate_plan_proposal_cached(persona: str, user_goal_text: str, row: dict, horizon_days: int, weekly_cap_save: int, recommended_weekly: int):
    key = _cache_key(persona, user_goal_text, horizon_days, weekly_cap_save, recommended_weekly, row)
    if key in PROPOSAL_CACHE:
        obj = PROPOSAL_CACHE[key]
        try:
            return PlanProposal(**obj)
        except Exception:
            pass
    proposal = llm_generate_plan_proposal(persona, user_goal_text, row, horizon_days, weekly_cap_save, recommended_weekly)
    try:
        PROPOSAL_CACHE[key] = proposal.model_dump()
    except Exception:
        pass
    return proposal


def adjust_plan_with_spend_optimal(amount: int):
    """Ph√¢n b·ªï b√π v√†o nhi·ªÅu ng√†y c√≤n l·∫°i, t·ªïng b√π = amount, ∆∞u ti√™n kh√¥ng v∆∞·ª£t qu√° ~per_day_save.
    """
    if amount is None or amount <= 0 or not _current_plan.get("days"):
        return [], []
    days = [dict(date=d["date"], tasks=list(d["tasks"])) for d in _current_plan["days"]]
    old_days = [dict(date=d["date"], tasks=list(d["tasks"])) for d in _current_plan["days"]]

    horizon = int(_current_plan["plan"].get("horizon_days", len(days))) if _current_plan.get("plan") else len(days)
    weekly = int(_current_plan["plan"].get("weekly_save", 0)) if _current_plan.get("plan") else 0
    per_day_target = max(1, int(max(1, weekly) / max(7, horizon))) if weekly else max(1, int(amount / max(1, len(days))))

    # Ch·ªçn t·∫≠p ng√†y c√≤n l·∫°i k·ªÉ t·ª´ h√¥m nay
    today_str = date.today().isoformat()
    rem_idx = [i for i,d in enumerate(days) if d["date"] >= today_str]
    if not rem_idx:
        rem_idx = list(range(len(days)))

    remaining = int(amount)
    # ph√¢n b·ªï v√≤ng tr√≤n qua c√°c ng√†y c√≤n l·∫°i ƒë·∫øn khi h·∫øt
    ptr = 0
    while remaining > 0 and rem_idx:
        i = rem_idx[ptr % len(rem_idx)]
        chunk = min(per_day_target, remaining)
        bump = f"B√π l·∫°i {chunk:,}ƒë do chi ti√™u ph√°t sinh".replace(",",".")
        days[i]["tasks"].insert(0, bump)
        remaining -= chunk
        ptr += 1

    _current_plan["days"] = days
    return old_days, days

print("‚úÖ Cache & optimal adjust ready.")



‚úÖ Cache & optimal adjust ready.


In [44]:
# .env config loader (kh√¥ng ghi file, ch·ªâ n·∫°p n·∫øu c√≥)
try:
    from dotenv import load_dotenv
    load_dotenv()
    print("‚úÖ Loaded .env if present.")
except Exception as e:
    print("‚ÑπÔ∏è python-dotenv not installed; b·ªè qua.")

# Ki·ªÉm th·ª≠ nhanh parsing s·ªë v√† validate JSON output

def test_parsers_and_schema():
    tests = [
        ("tr√† s·ªØa 60k", 60000),
        ("ƒÉn buffet 230k", 230000),
        ("mua √°o 1.2m", 1200000),
        ("coffee 15,000", 15000),
    ]
    ok = True
    for s, expect in tests:
        got = _extract_amount(s)
        if got != expect:
            print("‚ùå parse:", s, "=>", got, "!=", expect)
            ok = False
    if ok:
        print("‚úÖ amount parser passed.")

    # Validate m·∫´u PlanProposal t·ªëi thi·ªÉu (7 ng√†y)
    sample = {
        "feasibility": "ok",
        "weekly_cap_save": 700000,
        "recommended_weekly_save": 600000,
        "reasons": ["demo"],
        "proposal": {"target_amount": 15000000, "target_date": "2026-02-01"},
        "week_plan": [{"date":"2025-09-10","tasks":["Gi·ªØ ti·∫øt ki·ªám ng√†y 100.000ƒë"]} for _ in range(7)],
        "supervision_note": "T√¥i s·∫Ω gi√°m s√°t tu·∫ßn n√†y. ƒê·∫°t ‚Üí l·∫∑p l·∫°i; Kh√¥ng ƒë·∫°t ‚Üí ƒëi·ªÅu ch·ªânh.",
        "confirm_question": "B·∫°n ƒë·ªìng √Ω kh√¥ng?"
    }
    try:
        PlanProposal(**sample)
        print("‚úÖ schema validation passed.")
    except ValidationError as e:
        print("‚ùå schema validation:", e)

# Ch·∫°y test nhanh (c√≥ th·ªÉ t·∫Øt n·∫øu kh√¥ng c·∫ßn)
test_parsers_and_schema()



‚ÑπÔ∏è python-dotenv not installed; b·ªè qua.
‚ùå parse: mua √°o 1.2m => 12000000 != 1200000
‚úÖ schema validation passed.
