# Setup

In [1]:
import torch
import spacy
from spacy import displacy
import IPython
from IPython.display import display, HTML

import pandas as pd
import numpy as np

from datasets import Dataset
from transformers import BertTokenizer, BertTokenizerFast, BertForSequenceClassification, TrainingArguments, Trainer,  DataCollatorWithPadding

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from prompt_gen import generate_prompt, generate_response

#from huggingface_hub import login
#login()

print(IPython.__version__)


---------- Prompt Template -------------
You are a highly experienced travel expert and advisor for Singapore. Your task is to provide a well-structured and practical travel itinerary based on the user's needs.

Here is the user's original request:
 Hi, I'm planning a 7-day backpacking trip to Singapore in July 2025. I'm traveling alone and really want to explore the local culture, especially museums and art. I'm vegetarian and on a budget of about SGD 1000. Can you suggest cultural places to visit and affordable vegetarian food options?

Specifically, as a Family Traveler and Foodie , the user wants to do explore culture in Singapore. The travel dates are July 2025. The trip duration is 7 days. The user prefers vegetarian cuisine. Budget is [1000]. The travel style is backpacking. Traveling group includes 1 adult.

Additional factual information about Singapore. Please integrate this context seamlessly into your advice:
1. The National Gallery hosts rotating exhibits on Southeast Asi

In [2]:
print("PyTorch version:", torch.__version__)
print("MPS available:", torch.backends.mps.is_available())
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("MPS device is available and selected.")
else:
    device = torch.device("cpu")
    print("MPS device not available. Using CPU.")

x = torch.tensor([1, 2, 3], device=device)
print(x.device)

PyTorch version: 2.7.0+cu126
MPS available: False
MPS device not available. Using CPU.
cpu


In [3]:
# Load spaCy model
nlp = spacy.load("en_core_web_trf")

In [4]:
# All spaCy labels
print(nlp.get_pipe("ner").labels)

('CARDINAL', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART')


| **Label**  | **Purpose in Your Agent**                                                      |
| ---------- | ------------------------------------------------------------------------------ |
| `GPE`      | Detect cities, countries, districts (e.g., *Singapore*, *Kampong Glam*)        |
| `LOC`      | Identify general locations (e.g., *Marina Bay*, *Orchard Road*)                |
| `FAC`      | Capture facilities like *MRT*, *airport*, *hotel*                              |
| `DATE`     | Recognize travel dates (e.g., *3 June*, *next Monday*)                         |
| `TIME`     | Times of day for activities or bookings (e.g., *10 AM*, *evening*)             |
| `DURATION` | Trip length or durations (e.g., *3 days*, *2 nights*)                          |
| `ORG`      | Travel operators, hotel chains, airlines (e.g., *Expedia*, *Marina Bay Sands*) |
| `MONEY`    | Budget, pricing (e.g., *SGD 100*, *\$200*)                                     |
| `PERSON`   | User or people mentioned in dialogue (for chatbot personalization if needed)   |
| `EVENT`    | Named events or festivals (e.g., *Singapore Night Festival*)                   |
| `CARDINAL` | Generic numbers (e.g., *2 adults*, *4 attractions*)                            |
| `ORDINAL`  | Day number in trip or itinerary step (e.g., *first day*, *3rd night*)          |


# NLP Modules

In [5]:
# Rule-based dictionary of known terms
FOOD_TERMS = {'rojak', 'prawn mee', 'bubur cha cha', 'mee siam', 'itek-itek', 'popiah', 
              'fish head curry', 'pongal', 'kueh pie tee', 'char kway teow', 
              'hainanese chicken rice', 'mee goreng', 'wonton mee', 'nasi lemak', 
              'beef rendang', 'bakuteh', 'nasi padang', 'teochew porridge', 
              'yong tau foo', 'char koay teow', 'kueh salat', 'bak chor mee', 'chicken wings', 
              'curry puffs', 'kong bah pau', 'oyster omelette', 'bak kut teh', 'har jeong gai', 
              'kway chap', 'mee rebus ayam', 'laksa', 'mee rebus', 'wan tan mee', 'otah-otah', 
              'carrot cake', 'ayam buah keluak', 'satay', 'lor mee'}

TRANSPORT_TERMS = {"mrt", "ez-link", "bus pass", "circle line", "east west line"}
LOCATION_TERMS = {"marina bay", "kampong glam", "chinatown", "sentosa"}

SPECIAL_REQUIREMENT_TERMS = [
    # Accessibility
    "wheelchair", "disabled", "elderly", "mobility", "ramp", "accessible", "no stairs",
    
    # Dietary
    "halal", "vegetarian", "vegan", "gluten-free", "kosher",
    
    # Kid/baby-friendly
    "stroller", "baby", "infant", "kid-friendly", "child seat",
    
    # Pet-related
    "pet-friendly", "pets allowed", "dog", "cat", "no pets",

    # Sensory/environmental
    "quiet", "no smoking", "non-smoking", "low crowd", "avoid crowded", "no stairs", "no noise"
]

In [6]:
# Define essential fields that must be filled
ESSENTIAL_FIELDS = ["intent", "location", "date", "duration_days"]

## Load Trained Intent Detection Model

In [7]:
# Load trained model and tokenizer
model_path = "./intent_model"  # Change to your model directory
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizerFast.from_pretrained(model_path)

# Put model in eval mode
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

## Classify Intent

In [8]:
label2id = {'FindPlace': 0, 'BookFlight': 1, 'AskOpeningHour': 2, 'SearchHotel': 3, 'PlanItinerary': 4}
id2label = {v: k for k, v in label2id.items()}

In [9]:
def classify_intent(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    pred_id = torch.argmax(outputs.logits, dim=1).item()
    return id2label[pred_id]

## Understand the Entities

In [10]:
def understand_entities(text):
    doc = nlp(text)
    
    # Render dependencies
    dep_html = displacy.render(doc, style="dep", page=True)
    display(HTML(dep_html))

    # Render named entities
    ent_html = displacy.render(doc, style="ent")
    display(HTML(ent_html))

    for token in doc:
        print(f"{token.text:15} | Lemma: {token.lemma_:10} | POS: {token.pos_:8} | Tag: {token.tag_:6} | Dep: {token.dep_:12} | "
        f"Shape: {token.shape_:10} | Alpha: {token.is_alpha} | Stop: {token.is_stop}")

    return "Dependency tree saved to 'dep_tree.html', Entity visualization saved to 'entities.html'."

## Initial the Dialogue State

In [11]:
# Define base dialogue state format
def init_dialogue_state():
    return {
          "intent": None,
          "location": [],
          "date": [],
          "duration_days": [],
          "food": [],
          "budget": [],
          "transport": [],
          "event": [],
          "style": [],                   
          "num_kids": [],
          "num_adults": [],
          "special": [],
    }

# x Destination: Singapore, Sentosa
# Persona: family with kids, solo traveler
# Activity: shopping, food, studying
# Accommodation: 4-star hotel, budget hotel
# x Transport: flight, car, train, cruise, ferry
# x Duration: 3 days, two weeks
# x Date: July, 10 June 2025
# x Scope (Intent): overall trip planning, accommodation advice, food advice
# Tip: best time to visit, weather in December
# x Budget: under $1000", luxurious
# Custom: visa, passport validity
# more..


## Extract Entities

In [12]:
# Mapping spaCy labels to our labels
spacy_to_custom_labels = {
    "GPE": "location",
    "LOC": "location",
    "FAC": "location",
    "DATE": "date",
    "TIME": "date",
    "DURATION": "date",
    "EVENT": "event",
    "MONEY": "budget",
}

### Time Extraction

In [13]:
from symspellpy.symspellpy import SymSpell, Verbosity

# Initialize (load once)
sym_spell = SymSpell(max_dictionary_edit_distance=2)
sym_spell.load_dictionary("./frequency_dictionary_en_82_765.txt", 0, 1)

def correct_text(text):
    suggestions = sym_spell.lookup_compound(text, max_edit_distance=2)
    return suggestions[0].term if suggestions else text

2025-06-27 14:40:34,406: E symspellpy.symspellpy] Dictionary file not found at frequency_dictionary_en_82_765.txt.


In [14]:
from rapidfuzz import fuzz, process

def fuzzy_match_time_unit(word):
    time_units = ["day", "days", "week", "weeks", "month", "months", "night", "nights"]
    result = process.extractOne(word, time_units, scorer=fuzz.ratio)
    if result:
        match, score = result[0], result[1]
        return match if score >= 80 else None
    return None

In [15]:
def classify_time_entity_fuzzy(ent):
    import re

    text = ent.text.lower().strip()
    corrected = correct_text(text)

    # Patterns
    month_keywords = [
        "january", "february", "march", "april", "may", "june",
        "july", "august", "september", "october", "november", "december",
        "jan", "feb", "mar", "apr", "jun", "jul", "aug", "sep", "sept", "oct", "nov", "dec"
    ]

    weekday_keywords = [
        "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday",
        "mondays", "tuesdays", "wednesdays", "thursdays", "fridays", "saturdays", "sundays"
    ]

    # If it looks like a range, treat it as date
    if re.search(r"\b(from|to|until|between)\b", corrected):
        return "date"

    # If contains explicit duration unit like "days", "weeks"
    unit = fuzzy_match_time_unit(corrected.split()[-1])
    if unit and any(char.isdigit() for char in corrected):
        return "duration_days"

    if any(month in corrected for month in month_keywords):
        return "date"

    if any(day in corrected for day in weekday_keywords):
        return "date"

    # Detect YYYY-MM-DD or DD/MM/YYYY patterns
    if re.search(r"\d{1,2}[/-]\d{1,2}[/-]\d{2,4}", corrected) or re.search(r"\d{4}-\d{2}-\d{2}", corrected):
        return "date"

    # Fallback to original label
    return ent.label_.lower()


In [16]:
from dateutil import parser

def normalize_date(text):
    try:
        dt = parser.parse(text, fuzzy=True, dayfirst=True)
        return dt.strftime('%Y-%m-%d')
    except:
        return None

In [17]:
def is_potential_noise_date(ent):
    text = ent.text.strip().lower()
    # If it's a plain number, be skeptical
    if text.isdigit():
        num = int(text)
        # Ages, counts, room numbers, etc.
        if 0 < num <= 30:
            # Check surrounding context for non-temporal signals
            left = ent.doc[max(ent.start - 2, 0):ent.start]
            right = ent.doc[ent.end:min(ent.end + 2, len(ent.doc))]
            window = " ".join([t.text.lower() for t in list(left) + list(right)])
            if re.search(r"(age|year[- ]?old|kid|child|room|group|people|class|seat)", window):
                return True
    # Also guard against short dates like "8" or "12"
    if len(text) <= 2 and text.isdigit():
        return True
    return False


from datetime import datetime

def compute_duration_from_dates(date_range):
    start = datetime.strptime(date_range["start"], "%Y-%m-%d")
    end = datetime.strptime(date_range["end"], "%Y-%m-%d")
    duration = (end - start).days
    return f"{duration} days" if duration > 0 else "Invalid date range"

In [18]:
import re

def is_relative_day(text):
    """
    Check if the input string refers to a relative day (e.g., today, tomorrow, next week).
    """
    text = text.lower().strip()

    relative_keywords = [
        "today", "tomorrow", "tonight", "yesterday",
        "this morning", "this evening", "this afternoon",
        "next week", "next month", "next year",
        "this week", "this month", "this year",
        "coming weekend", "this weekend", "next weekend"
    ]

    # Match relative phrases
    for phrase in relative_keywords:
        if phrase in text:
            return True

    # Regex to catch things like "in 3 days", "after 1 week", "within 2 months"
    relative_pattern = r"(in|after|within)\s+\d+\s+(day|days|week|weeks|month|months|year|years)"
    if re.search(relative_pattern, text):
        return True

    return False

### NER Extraction

In [19]:
# Extract using spaCy
def extract_with_spacy(text):
    doc = nlp(text)
    state = init_dialogue_state()
    
    for ent in doc.ents:
        term = ent.text.strip().lower()
        label = ent.label_
        
        # Handle location
        if label in ["GPE", "LOC"]:
            if term not in TRANSPORT_TERMS and term not in state["location"]:
                print(term)
                state["location"].append(term)
            continue
            
        # Handle DATE vs. duration vs. relative day
        if label == "DATE":
            if is_potential_noise_date(ent):
                continue  # skip likely misclassified number
            unit = classify_time_entity_fuzzy(ent)
            if unit == "date":
                if is_relative_day(term):
                    pass
                else:
                    norm = normalize_date(term)
                    if norm and norm not in state["date"]:
                        state["date"].append(norm)
            elif unit == "duration_days":
                if term not in state["duration_days"]:
                    state["duration_days"].append(term)
            continue  # skip further processing of this term
            
        # Mapped labels
        mapped_label = spacy_to_custom_labels.get(label, None)
        if mapped_label and term not in state[mapped_label]:
            state[mapped_label].append(term)        
        
        # Keyword-based overrides
        if term in FOOD_TERMS and term not in state["food"]:
            state["food"].append(term.lower())
        elif term in TRANSPORT_TERMS and term not in state["transport"]:
            state["transport"].append(term.lower())
        elif term in LOCATION_TERMS and term not in state["location"]:
            pass

    # Fallback token-level matches
    for token in doc:
        word = token.lemma_.lower()
        
        if word in FOOD_TERMS and word not in state["food"]:
            state["food"].append(word)
        elif word in TRANSPORT_TERMS and word not in state["transport"]:
            state["transport"].append(word)
        elif word in LOCATION_TERMS and word not in state["location"]:
            pass

    return state

In [20]:
test = "We’re interested in Singapore’s local culture and historical sights 7 July, but would prefer simplified names or explanations for easier understanding."
# print(extract_with_spacy(test))
result = extract_with_spacy(test)

for key, value in result.items():
    print(f"{key}: {value}")

singapore
intent: None
location: ['singapore']
date: ['2025-07-07']
duration_days: []
food: []
budget: []
transport: []
event: []
style: []
num_kids: []
num_adults: []
special: []


## Check Missing Essential Entities

In [21]:
def missing_fields(parsed_response):
    missing = []
    for field in ESSENTIAL_FIELDS:
        value = parsed_response.get(field)
        if value is None or (isinstance(value, list) and len(value) == 0):
            missing.append(field)
    return missing

In [22]:
def extract_or_clarify(user_query):
    # Try quick parse from your own rules or previous step
    parsed = quick_parse(user_query)  # Assume you have a lightweight parser

    missing = missing_fields(parsed)

    if missing:
        print(f"🤖 I need more info. Could you please provide: {', '.join(missing)}?")
        return None  # Await user's clarification
    else:
        return parsed


## Build Prompt

In [23]:
def build_prompt(user_query):
    return f"""
You are a travel assistant AI that extracts **only explicitly stated information** from user messages to help plan an itinerary.

🔍 Your task is to extract fields from the user query **without guessing or inferring**.  
- If something is not clearly mentioned, return `None` (for single values) or `[]` (for lists).  
- Normalize any fuzzy or descriptive terms to a known travel-friendly format.

Return a valid Python dictionary using this structure:
```python
dialogue_state = {{
    "location": List[str],             # Places or landmarks mentioned
    "date": List[str],                 # Exact or relative dates (e.g., "June 3", "next week")
    "duration_days": List[str],        # Durations like "3 days", "a week"
    "food": List[str],                 # Local foods, cuisines
    "budget": List[str],               # Budget phrases like "$300", "under $150"
    "transport": List[str],            # Modes of travel: MRT, bus, taxi
    "event": List[str],                # Activities like shopping, sightseeing, museum
    "style": List[str],                # Descriptive preferences: relaxed, luxury, tourist-friendly
    "num_kids": List[str],             # Number of children (if mentioned)
    "num_adults": List[str],           # Number of adults (if mentioned)
    "special": List[str]               # Special needs: halal, wheelchair access, baby stroller, others
}}

🗣 User Query:
"{user_query}"

🔚 Respond with only the dictionary. Do not include explanations, prefixes, or formatting like triple quotes.
"""

## Call LLM when Fallback

In [24]:
import ast
import json
import requests

def call_ollama_mistral(user_query):
    prompt = build_prompt(user_query)

    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={"model": "mistral", "prompt": prompt},
            stream=True,
            timeout=30
        )
    except requests.RequestException as e:
        print("❌ Ollama request failed:", e)
        return {}

    response_text = ""
    for line in response.iter_lines():
        if line:
            try:
                part = json.loads(line)
                if "response" in part:
                    response_text += part["response"]
            except json.JSONDecodeError:
                continue

    try:
        # Remove "dialogue_state =" or any prefix
        dict_start = response_text.find("{")
        dict_str = response_text[dict_start:].strip()

        # ✅ Use ast.literal_eval for Python-style literal
        parsed = ast.literal_eval(dict_str)
        return parsed
    except Exception as e:
        print("⚠️ Ollama dict parse error:", str(e))
        print("🔁 Raw response:", response_text)
        return {}

# Test ollama mistral    
user_query = 'Hello world!'
response = call_ollama_mistral(user_query)
print(response)


{'location': None, 'date': None, 'duration_days': None, 'food': None, 'budget': None, 'transport': None, 'event': None, 'style': None, 'num_kids': None, 'num_adults': None, 'special': None}


## Dialogue State Management

In [25]:
def merge_states(primary, fallback):
    merged = primary.copy()
    for key in merged:
        is_empty = merged[key] in [None, [], {}]
        has_fallback = fallback.get(key) not in [None, [], {}]
        if is_empty and has_fallback:
            merged[key] = fallback[key]
    return merged

In [26]:
def handle_non_itinerary_intent(intent, user_query):
    responses = {
        "FindPlace": "📍 I can help you find a place! What type of place are you looking for?",
        "BookFlight": "✈️ Sure! I can help you book a flight. When and where do you want to travel?",
        "AskOpeningHour": "⏰ Please tell me which place you'd like to know the opening hours for.",
        "SearchHotel": "🏨 Looking for a hotel? Let me know your destination and budget.",
    }
    return responses.get(intent, "🤖 I'm not sure how to help with that yet.")

In [27]:
import re

def detect_additional_signals(text):
    text_lower = text.lower()

    # Direct phrase match
    for term in SPECIAL_REQUIREMENT_TERMS:
        if term in text_lower:
            return True, term

    # Regex patterns for common needs
    patterns = [
        r"\b(no\s+stairs|no\s+smoking|non[-\s]?smoking)\b",
        r"\b(kid[-\s]?friendly|baby[-\s]?friendly|pet[-\s]?friendly)\b",
        r"\b(gluten[-\s]?free|wheelchair[-\s]?accessible)\b",
        r"\b(halal|kosher|vegetarian|vegan)\b",
        r"\b(avoid\s+(crowds|crowded))\b",
    ]

    for pattern in patterns:
        if re.search(pattern, text_lower):
            return True, pattern

    # Fallback: check noun chunks with spaCy
    doc = nlp(text)
    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.lower()
        if any(term in chunk_text for term in SPECIAL_REQUIREMENT_TERMS):
            return True, chunk_text

    return False, None


In [28]:
from sentence_transformers import SentenceTransformer, util

st_model = SentenceTransformer("all-MiniLM-L6-v2")

NEW_PLAN_EXAMPLES = [
    "plan a new trip",
    "start a new itinerary",
    "create a new travel plan",
    "begin another journey",
    "forget the last trip",
    "make a new plan",
    "we want to go somewhere else now",
    "i'm planning a different trip",
    "start over",
    "next, I want to plan something new"
]

def is_new_plan(user_query, threshold=0.7):
    query_emb = st_model.encode(user_query, convert_to_tensor=True)
    example_embs = st_model.encode(NEW_PLAN_EXAMPLES, convert_to_tensor=True)

    similarity_scores = util.cos_sim(query_emb, example_embs)[0]
    max_score = float(similarity_scores.max())

    return max_score > threshold


## Confirmation

In [29]:
FINAL_CONFIRM_PHRASES = {
    "looks good", "that's fine", "okay", "confirm", "yes", "that works",
    "good to go", "done", "finalize", "proceed", "complete the plan"
}

def is_final_confirmation(text):
    return text.strip().lower() in FINAL_CONFIRM_PHRASES

In [30]:
YES_PHRASES = {"yes", "yeah", "sure", "of course", "yep", "affirmative", "let's go", "ok", "okay"}

def is_affirmative(text):
    return text.strip().lower() in YES_PHRASES

## RAG Integration

In [31]:
import sys
from pathlib import Path

# Add the RAG directory to sys.path
sys.path.append(str(Path("RAG").resolve()))

from query_engine_lib import initialize_rag_pipeline, run_query

✅ Config loaded - Base directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG
✅ Data directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\data
✅ Cache directory: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\cache
✅ Language dictionaries: C:\Users\pakke\OneDrive - Singapore Management University\CS605 Natural Language Processing for Smart Assistant\Project\RAG\cache\lang_dict




In [32]:
rag_engine = initialize_rag_pipeline()


INFO:src.model_manager:OllamaManager initialized with model: qwen2.5vl:3b
INFO:src.model_manager:Model qwen2.5vl:3b is available locally
INFO:src.embedding_manager:EmbeddingManager initialized with text model: nomic-embed-text, vision model: qwen2.5vl:3b
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:src.chroma_manager:Using existing collection: sg_explorer_documents
INFO:src.chroma_manager:ChromaManager initialized with collection: sg_explorer_documents
INFO:src.rag_query:RAGQueryEngine initialized with default_results=3


✅ RAG pipeline initialized


In [33]:
def sent_prompt_to_llm(prompt):
    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={"model": "mistral", "prompt": prompt},
            stream=True,
            timeout=30
        )
    except requests.RequestException as e:
        print("❌ Ollama request failed:", e)
        return {}

    # Stream and collect response chunks
    output = ""
    for line in response.iter_lines():
        if line:
            try:
                data = json.loads(line.decode("utf-8"))
                output += data.get("response", "")
            except json.JSONDecodeError:
                continue  # Ignore malformed lines

    return output

# rag vector db simulator to generate factual context based on similarity
import json
def rag_vdb_sim(fact_count):
    if fact_count <= 0:
        return []
    elif fact_count > 3:
        fact_count = 3 # keep max at 3 to reduce tokens and speed up the response.
    
    response = sent_prompt_to_llm(
        f"You are a travel domain assistant. Generate {fact_count} factual travel-related information about Singapore. "
        "Return the output strictly as a JSON array of strings, where each array element contains one fact. "
        "Do not include any explanations or formatting outside the array."
    )
    try:
        return json.loads(response)
    except json.JSONDecodeError:
        print("Failed to parse response into JSON array.", response)
        return []    

rag_chunks = rag_vdb_sim(3)
for chunk in rag_chunks:
    print(chunk)

Singapore is a city-state in Southeast Asia with a population of approximately 5.7 million people.
The official languages of Singapore are Malay, Mandarin, Tamil, and English.
The Merlion, a mythical creature with a lion's head and the body of a fish, is the mascot of Singapore.


## Persona Detection

In [34]:
from transformers import BertTokenizerFast, AutoModelForSequenceClassification, RobertaTokenizerFast
import torch
import joblib
import numpy as np

# Load the tokenizer exactly the same as training
#persona_tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
#persona_model = AutoModelForSequenceClassification.from_pretrained("././person_classification/bert_multilabel_persona/checkpoint-2155")
#persona_label_encoder = joblib.load("./person_classification/bert_multilabel_persona/label_encoder.bin")  # This is a MultiLabelBinarizer

persona_tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")
persona_model = AutoModelForSequenceClassification.from_pretrained("./person_classification/roberta_multilabel_persona/checkpoint-2155")
persona_label_encoder = joblib.load("./person_classification/roberta_multilabel_persona/label_encoder.bin")  # This is a MultiLabelBinarizer

persona_model.eval()

def predict_personas(user_query, threshold=0.5):
    inputs = persona_tokenizer(user_query, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = persona_model(**inputs)
    logits = outputs.logits
    probs = torch.sigmoid(logits).squeeze().numpy()  # Convert logits to probabilities

    # Predict all classes above the threshold
    predicted_labels = np.where(probs >= threshold)[0]
    personas = persona_label_encoder.classes_[predicted_labels]
    return list(personas)


# Test
user_queries = [
    "We're 6 young adults (25-30) staying in Singapore for 6 days. We love outdoor activities, hiking trails, cycling, and unique experiences like night safaris. Include one rest day and show us images of adventure activities available.",
    "A family with a child in a wheelchair, maximize sightseeing in 3 days in Singapore",
    "We're tech conference attendees, maximize sightseeing in 3 days in Singapore with MRT travel",
    "We want a wellness retreat, maximize sightseeing in 3 days in Singapore for a weekend",
    "I'm a solo traveler, experience something unique in Singapore (from July 1 to July 5) avoiding crowded places",
    "A couple planning a honeymoon, experience something unique in Singapore (sometime in March) for a weekend including vegan options",
]
for query in user_queries:
    personas = predict_personas(query, threshold=0.5)
    print(personas)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


['Adventure Seeker', 'Family Traveler']
['Adventure Seeker', 'Family Traveler']
['Solo Traveler']
['Luxury Seeker', 'Relaxation Seeker']
['Backpacker', 'Solo Traveler']
['Family Traveler']


## Process User Input Pipelines

In [35]:
def format_html_response(text: str) -> str:
    """Format raw LLM answer for HTML display inside the chatbot."""
    return text.replace('\n\n', '<br><br>').replace('\n', '<br>')


In [36]:
def process_user_input(user_query, dialogue_state=None, plan_finalized=False):    

    if dialogue_state is None:
        dialogue_state = init_dialogue_state()
        plan_finalized = False

    # Step 1: Handle confirmed state
    if plan_finalized:
        if is_new_plan(user_query) or is_affirmative(user_query):
            dialogue_state = init_dialogue_state()
            plan_finalized = False
            dialogue_state["last_response"] = None
            return dialogue_state, "Got it! Starting a new itinerary. Let's begin.", plan_finalized
        else:
            last_answer = dialogue_state.get("last_response", "No saved itinerary.")
            response_msg = (
                "🎉 <b>Your itinerary has been finalized!</b><br><br>"
                f"{format_html_response(last_answer)}<br><br>"
                "Would you like to plan another trip?"
            )
            return dialogue_state, response_msg, plan_finalized

    # Step 2: Intent classification
    if dialogue_state.get("intent") is None:
        intent = classify_intent(user_query)
        if intent == "PlanItinerary":
            dialogue_state["intent"] = intent
        else:
            return dialogue_state, "❌ I can only help with itinerary planning for now.", plan_finalized

    # Step 3: spaCy + merge
    spacy_state = extract_with_spacy(user_query)
    dialogue_state = merge_states(dialogue_state, spacy_state)

    # Step 4: Check for required fields
    essential_fields = ["intent"]
    missing = [key for key in essential_fields if not dialogue_state.get(key)]
    if missing:
        return dialogue_state, (
            f"⚠️ I still need the following: {', '.join(missing)}.<br>"
            "Could you please provide that?"
        ), plan_finalized

    # Step 5: LLM fallback for signals
    if detect_additional_signals(user_query):
        llm_state = call_ollama_mistral(user_query)
        dialogue_state = merge_states(dialogue_state, llm_state)

    # Step 6: Check if user is confirming a plan
    if is_final_confirmation(user_query):
        last_answer = dialogue_state.get("last_response", None)
        if last_answer:
            formatted = format_html_response(last_answer)
            plan_finalized = True
            response_msg = (
                "🎉 <b>Your itinerary has been finalized!</b><br><br>"
                f"{formatted}<br><br>"
                "Would you like to plan another trip?"
            )
            return dialogue_state, response_msg, plan_finalized
        else:
            return dialogue_state, (
                "⚠️ There's no plan to confirm yet. Try saying something like "
                "'Plan a 3-day trip to Tokyo first.'"
            ), plan_finalized

    # Step 7: Persona + RAG
    dialogue_state['persona'] = predict_personas(user_query, threshold=0.5)
    rag_chunks = rag_vdb_sim(0)
    prompt = generate_prompt(user_query, dialogue_state, rag_chunks, 200)

    # Step 8: Run LLM and format
    travel_response = run_query(prompt, rag_engine)
    raw_answer = travel_response['answer']
    formatted_answer = format_html_response(raw_answer)
    #dialogue_state["last_response"] = raw_answer

    # Step 9: Show plan preview
    response_msg = (
        "✅ <b>Here's your itinerary summary:</b><br><br>"
        f"{formatted_answer}<br><br>"
        #"<i>Would you like to add or modify anything?</i><br>"
        #"Say <b>'looks good'</b> or <b>'confirm'</b> to finalize.<br><br>"
        
        "<details><summary><b>🔧 View Prompt & Dialogue State</b></summary><br><br>"

        "<b>📁 Dialogue State:</b><br>"
        f"<div style='background:#f5f5f5; padding:10px; border-radius:6px; "
        "white-space:pre-wrap; word-wrap:break-word; font-family:monospace;'>"
        f"{json.dumps(dialogue_state, indent=2)}</div>"
    
        "<b>🧠 Prompt:</b><br>"
        f"<div style='background:#f5f5f5; padding:10px; border-radius:6px; "
        "white-space:pre-wrap; word-wrap:break-word; font-family:monospace;'>"
        f"{prompt}</div><br>"
        
        "</details><br><br>"
    )

    return dialogue_state, response_msg, plan_finalized


In [37]:
user_query = "We are a family of four—two adults and two kids aged 5 and 8—planning a trip to Singapore."
user_query = "We are a family of four—two adults and two kids aged 5 and 8—planning a trip to Singapore in 6 July for 3 days."
user_query = "We're a family of 4 with two children aged 6 and 9 visiting Singapore for 5 days. We love interactive science exhibits, nature parks, and kid-friendly activities. Can you suggest an itinerary with one rest day in the middle? Show us relevant attractions with images."
user_query = "We're 6 young adults (25-30) staying in Singapore for 6 days. We love outdoor activities, hiking trails, cycling, and unique experiences like night safaris. Include one rest day and show us images of adventure activities available."
user_query = "We're a group of 8 spanning three generations (grandparents, parents, teens) visiting for 7 days. Need activities suitable for all ages including accessible attractions, traditional food experiences, and family-friendly entertainment. Plan one rest day mid-week."
process_user_input(user_query)


INFO:src.rag_query:Query: You are a highly experienced travel expert and advisor for Singapore. Your task is to provide a well-structured and practical travel itinerary based on the user's needs.

Here is the user's original request:
 We're a group of 8 spanning three generations (grandparents, parents, teens) visiting for 7 days. Need activities suitable for all ages including accessible attractions, traditional food experiences, and family-friendly entertainment. Plan one rest day mid-week.

Specifically, as a Family Traveler , the user wants to do PlanItinerary The travel dates are None. The trip duration is 7 days days. The user prefers traditional food experiences cuisine. The user is interested in events like activities suitable for all ages, accessible attractions, family-friendly entertainment. Traveling group includes 8 kids. Special preferences include accessible attractions.

Instructions:
 1. Keep the response concise, engaging and aligned with the user's intent and prefere

✅ Query successful

Certainly! Here's a tailored itinerary for your family trip to Singapore, focusing on activities suitable for all ages, traditional food experiences, and family-friendly entertainment, with one rest day mid-week:

**Day 1: Arrival and Exploration**
- **Morning:** Arrive in Singapore and check into your hotel. Enjoy a traditional Singaporean breakfast at a local eatery.
- **Afternoon:** Visit the Singapore Zoo, known for its family-friendly exhibits and educational programs.
- **Evening:** Enjoy a traditional Singaporean dinner at a local restaurant, such as a hawker center, and explore the night market.

**Day 2: Traditional Food and Cultural Experience**
- **Morning:** Start with a visit to the Singapore Botanic Gardens, known for its beautiful gardens and cultural events.
- **Afternoon:** Explore the Singapore River and take a boat tour to see the city's skyline and historical sites.
- **Evening:** Enjoy a traditional Singaporean dinner at a local restaurant, such

({'intent': 'PlanItinerary',
  'location': ['None'],
  'date': ['None'],
  'duration_days': ['7 days'],
  'food': ['traditional food experiences'],
  'budget': ['None'],
  'transport': ['None'],
  'event': ['activities suitable for all ages',
   'accessible attractions',
   'family-friendly entertainment'],
  'style': ['None'],
  'num_kids': ['8'],
  'num_adults': ['None'],
  'special': ['accessible attractions'],
  'persona': ['Family Traveler']},
 '✅ <b>Here\'s your itinerary summary:</b><br><br>Certainly! Here\'s a tailored itinerary for your family trip to Singapore, focusing on activities suitable for all ages, traditional food experiences, and family-friendly entertainment, with one rest day mid-week:<br><br>**Day 1: Arrival and Exploration**<br>- **Morning:** Arrive in Singapore and check into your hotel. Enjoy a traditional Singaporean breakfast at a local eatery.<br>- **Afternoon:** Visit the Singapore Zoo, known for its family-friendly exhibits and educational programs.<br>- 

# Chatbot Dialogue (Main)

In [38]:
import gradio as gr
import time
from datetime import datetime

def get_timestamp():
    return datetime.now().strftime("%H:%M:%S")

def chat_fn_stream(message, history, state_bundle):
    dialogue_state, plan_finalized = state_bundle

    # Get timestamps
    user_time = get_timestamp()
    assistant_time = get_timestamp()

    # Step 1: Generate user timestamp immediately
    user_time = get_timestamp()
    user_msg = f"User [{user_time}]: {message}"

    # Append user message immediately
    history.append((user_msg, ""))
    yield history, state_bundle, ""

    # Step 2: Show temporary "planning..." while processing
    history[-1] = (user_msg, "🤖AI Agent: 💬 Planning...")
    yield history, state_bundle, ""

    # Step 3: Generate assistant timestamp later (after processing)
    updated_state, full_reply, plan_finalized = process_user_input(
        message, dialogue_state, plan_finalized
    )
    assistant_time = get_timestamp()
    assistant_prefix = f"🤖AI Agent [{assistant_time}]:"
    
    # Step 4: Stream reply token by token
    reply = ""
    for token in full_reply.split():
        reply += token + " "
        
        # Keep formatting (newlines, code blocks)
        formatted_reply = reply.strip()
        
        # Replace only if needed — Gradio does allow `\n` inside messages now
        history[-1] = (user_msg, assistant_prefix + "\n" + formatted_reply)
        yield history, [updated_state, plan_finalized], ""
        time.sleep(0.03)

CSS = """
.contain { display: flex; flex-direction: column; }
#chatbot { flex-grow: 1; }
#component-0 { height: 100%; }
#component-1 { height: 100px; }
"""

with gr.Blocks(css=CSS) as demo:
    gr.Markdown("# Personal AI Travel Agent (PAT)")
    gr.Markdown("Where do you want to go today?")

    chatbot = gr.Chatbot(elem_id="chatbot")
    state = gr.State([init_dialogue_state(), False])
    history = gr.State([])
    msg = gr.Textbox(label="Type your message here")

    msg.submit(
        fn=chat_fn_stream,
        inputs=[msg, history, state],
        outputs=[chatbot, state, msg],
        concurrency_limit=1
    )


    
demo.launch()


  chatbot = gr.Chatbot(elem_id="chatbot")


* Running on local URL:  http://127.0.0.1:7872


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://127.0.0.1:7872/gradio_api/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://127.0.0.1:7872/ "HTTP/1.1 200 OK"


* To create a public link, set `share=True` in `launch()`.




In [39]:
# Test Cases
'''
We're a family of 4 with two children aged 6 and 9 visiting Singapore for 5 days. We love interactive science exhibits, nature parks, and kid-friendly activities. Can you suggest an itinerary with one rest day in the middle? Show us relevant attractions with images.
My husband and I (both 65+) are visiting Singapore for 4 days. We're interested in heritage sites, museums, and cultural districts like Chinatown and Little India. We need one relaxing day to rest. What would you recommend with photos of the attractions?
We're 6 young adults (25-30) staying in Singapore for 6 days. We love outdoor activities, hiking trails, cycling, and unique experiences like night safaris. Include one rest day and show us a detailed itinerary for our trip.
Three colleagues extending our business trip for a 3-day Singapore weekend. We want efficient sightseeing covering major landmarks, Gardens by the Bay, and Marina Bay area. Need one lighter day for recovery. Show attractions with visual references.
We're a group of 8 spanning three generations (grandparents, parents, teens) visiting for 7 days. Need activities suitable for all ages including accessible attractions, traditional food experiences, and family-friendly entertainment. Plan one rest day mid-week.
Four photography hobbyists spending 5 days in Singapore. We want the most photogenic locations - architecture, nature, street scenes, and cultural sites. Include one rest day for photo editing. Show us images of scenic spots and trails.
Two couples (40s) on a 4-day culinary and cultural journey. Interested in hawker centers, cultural heritage, cooking experiences, and traditional markets. Need one day to relax between food adventures. Display food-related attractions and cultural sites.
Five university students backpacking through Singapore for 6 days on a budget. Want free/cheap attractions, walking tours, nature trails, and authentic local experiences. Include one rest day. Show budget-friendly activities with images.
We're a couple passionate about botany and wildlife, visiting for 5 days. Want to explore Singapore Botanic Garden, nature reserves, bird watching spots, and conservation areas. Plan one quiet day for reflection. Show nature attractions and scientific sites for adults.
Three art teachers visiting Singapore for 7 days during school holidays. Interested in National Gallery, contemporary art spaces, creative districts, and hands-on art experiences. Need one rest day mid-trip. Display images of art venues and creative attractions.
I want to plan an itinerary that includes laksa, Kampong Gam and Marina Bay, and maybe take the MRT from 3 June 2025 to 9 June 2025.
Plan a 3-day trip around Marina Bay and Sentosa, eat laksa, take the MRT, and spend under $150. On Day 2 we want to shop.
We are a family of four?wo adults and two kids aged 5 and 8?lanning a trip to Singapore.
We'll be in Singapore for 6 days including arrival and departure, so we have 4 full days to explore.
Please suggest daytime activities in Singapore that are fun and creative for young children.
After our kids go to bed, we're like some recommendations for fun and fashionable adult activities at night.
We're interested in Singapore's local culture and historical sights, but would prefer simplified names or explanations for easier understanding.
We prefer a relaxed itinerary: maximum two attractions per day, with one rest day in the middle to explore freely.
Please generate a 6-day itinerary for our family trip to Singapore, considering only 4 full days for activities.
A couple planning a honeymoon, plan a 7-day slow-paced vacation in Singapore only daytime activities
My partner and I, plan a relaxing trip in Singapore (this December) for one week prefer less walking
Two adventure seekers, plan a 7-day slow-paced vacation in Singapore for a weekend
I'm traveling with a toddler, explore nature in Singapore for one week
A couple planning a honeymoon, experience something unique in Singapore (sometime in March) for a weekend including vegan options
My grandparents are visiting, find adventurous activities in Singapore for a weekend with a budget under $200
We're a couple in our 30s, maximize sightseeing in 3 days in Singapore (next week) for one week prefer less walking
A family of four with two kids, explore nature in Singapore (in mid-August) for a weekend avoiding crowded places
My grandparents are visiting, explore nature in Singapore (this December) avoiding crowded places
A couple planning a honeymoon, experience something unique in Singapore (next week) for one week
My grandparents are visiting, plan a 7-day slow-paced vacation in Singapore (next week) for 10 days
We want a wellness retreat, maximize sightseeing in 3 days in Singapore for 3 days
Two teachers on vacation, do something kid-friendly in Singapore for 3 days
We're history buffs, shop at iconic places in Singapore we're love some cooking classes
My partner and I, find adventurous activities in Singapore (next week)
Two elderly friends, find adventurous activities in Singapore for a weekend prefer less walking
A family with a child in a wheelchair, explore nature in Singapore with MRT travel
College friends on a break, do something kid-friendly in Singapore (sometime in March) for 3 days
My grandparents are visiting, explore cultural spots in Singapore for 10 days
I'm a budget traveler, attend an art festival in Singapore we're love some cooking classes
Two teachers on vacation, join local events in Singapore include Sentosa and Gardens by the Bay
My grandparents are visiting, join local events in Singapore for 10 days
College friends on a break, taste local cuisine in Singapore for 3 days
Two teachers on vacation, join local events in Singapore (from July 1 to July 5) for 3 days
We want a wellness retreat, maximize sightseeing in 3 days in Singapore for a weekend
I'm planning for my parents, attend an art festival in Singapore (from July 1 to July 5) for one week avoiding crowded places
We're tech conference attendees, do something kid-friendly in Singapore for a weekend avoiding crowded places
We are bird watchers, join local events in Singapore for 10 days including vegan options
Two teachers on vacation, taste local cuisine in Singapore (in mid-August) for 10 days we're love some cooking classes
We're tech conference attendees, maximize sightseeing in 3 days in Singapore with MRT travel
We're tech conference attendees, do something kid-friendly in Singapore (sometime in March) for 5 days
We're foodies, explore cultural spots in Singapore (in mid-August) for 5 days
A family of four with two kids, join local events in Singapore for a weekend only daytime activities
A family of four with two kids, join local events in Singapore (sometime in March) for one week
I'm traveling with a toddler, maximize sightseeing in 3 days in Singapore including vegan options
A family with a child in a wheelchair, shop at iconic places in Singapore (next week) for a weekend we're love some cooking classes
I'm planning for my parents, do something kid-friendly in Singapore for 10 days
We're a couple in our 30s, taste local cuisine in Singapore for 10 days
I'm traveling with a toddler, plan a relaxing trip in Singapore for 10 days with MRT travel
My grandparents are visiting, taste local cuisine in Singapore
We're tech conference attendees, join local events in Singapore (from July 1 to July 5) for 3 days avoiding crowded places
My partner and I, explore nature in Singapore for 5 days
We're a couple in our 30s, do something kid-friendly in Singapore include Sentosa and Gardens by the Bay
We're history buffs, join local events in Singapore (next week) for a weekend with a budget under $200
I'm planning for my parents, do something kid-friendly in Singapore for 10 days
We want a wellness retreat, attend an art festival in Singapore (in mid-August) with some museum visits
Two teachers on vacation, shop at iconic places in Singapore (this December) for 10 days avoiding crowded places
Two teachers on vacation, explore cultural spots in Singapore
A couple planning a honeymoon, plan a 7-day slow-paced vacation in Singapore for 3 days need kid-safe places
Two adventure seekers, plan a relaxing trip in Singapore (this December) for a weekend
A couple planning a honeymoon, attend an art festival in Singapore (in mid-August) for 3 days only daytime activities
A couple planning a honeymoon, plan a relaxing trip in Singapore (this December)
We're foodies, join local events in Singapore (from July 1 to July 5) for 5 days
We want a wellness retreat, experience something unique in Singapore (sometime in March) for one week with some museum visits
I'm a budget traveler, attend an art festival in Singapore (sometime in March) for a weekend with a budget under $200
Two adventure seekers, plan a relaxing trip in Singapore for 3 days
I'm a solo traveler, do something kid-friendly in Singapore for a weekend
I'm traveling with a toddler, explore nature in Singapore include Sentosa and Gardens by the Bay
My partner and I, join local events in Singapore
We're history buffs, attend an art festival in Singapore (next week) for 5 days avoiding crowded places
We're history buffs, explore cultural spots in Singapore (this December) for 10 days preferably using public transport
A family with a child in a wheelchair, maximize sightseeing in 3 days in Singapore for 3 days
We're history buffs, explore cultural spots in Singapore we're love some cooking classes
College friends on a break, explore cultural spots in Singapore (from July 1 to July 5) for a weekend preferably using public transport
We're history buffs, join local events in Singapore prefer less walking
We are bird watchers, plan a relaxing trip in Singapore (in mid-August)
We're tech conference attendees, join local events in Singapore (next week) for 5 days include Sentosa and Gardens by the Bay
I'm traveling with a toddler, plan a 7-day slow-paced vacation in Singapore (this December) with a budget under $200
I'm a solo traveler, experience something unique in Singapore
Two adventure seekers, explore cultural spots in Singapore (sometime in March) for 5 days
I'm a solo traveler, experience something unique in Singapore (from July 1 to July 5) avoiding crowded places
We are bird watchers, find adventurous activities in Singapore (next week) with a budget under $200
We are bird watchers, join local events in Singapore (this December)
I'm planning for my parents, explore nature in Singapore (from July 1 to July 5) for a weekend
We're foodies, find adventurous activities in Singapore (in mid-August)
A family of four with two kids, join local events in Singapore (next week) for 5 days
My partner and I, plan a relaxing trip in Singapore (next week) for 3 days
We want a wellness retreat, plan a relaxing trip in Singapore for a weekend prefer less walking
We want a wellness retreat, attend an art festival in Singapore for 3 days only daytime activities
We're history buffs, taste local cuisine in Singapore for a weekend
'''

"\nWe're a family of 4 with two children aged 6 and 9 visiting Singapore for 5 days. We love interactive science exhibits, nature parks, and kid-friendly activities. Can you suggest an itinerary with one rest day in the middle? Show us relevant attractions with images.\nMy husband and I (both 65+) are visiting Singapore for 4 days. We're interested in heritage sites, museums, and cultural districts like Chinatown and Little India. We need one relaxing day to rest. What would you recommend with photos of the attractions?\nWe're 6 young adults (25-30) staying in Singapore for 6 days. We love outdoor activities, hiking trails, cycling, and unique experiences like night safaris. Include one rest day and show us a detailed itinerary for our trip.\nThree colleagues extending our business trip for a 3-day Singapore weekend. We want efficient sightseeing covering major landmarks, Gardens by the Bay, and Marina Bay area. Need one lighter day for recovery. Show attractions with visual references