In [7]:
import requests  # Để sử dụng API OSRM
import os
import re
import json
import random
from datetime import datetime, timedelta
import math

from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI


In [None]:
def parse_tour_duration(duration_str):
    # Parse the duration string in 'HH:MM:SS' format
    time_parts = list(map(int, duration_str.split(':')))
    return timedelta(hours=time_parts[0], minutes=time_parts[1], seconds=time_parts[2])
    
# --- Hàm Tiện Ích ---

def haversine(coord1, coord2):
    lat1, lon1 = coord1
    lat2, lon2 = coord2
    R = 6371  # Bán kính Trái Đất (km)
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)
    a = math.sin(delta_phi/2.0)**2 + \
        math.cos(phi1)*math.cos(phi2)*math.sin(delta_lambda/2.0)**2
    c = 2*math.atan2(math.sqrt(a), math.sqrt(1 - a))
    meters = R * c * 1000
    return meters

# Hàm tính tổng thời gian của lộ trình
def calculate_total_time(itinerary):
    hotel = itinerary['hotel']
    places = itinerary['places']
    speed_kmh = 30
    total_time = timedelta()
    locations = []

    # Kiểm tra hotel['location'] trước khi thêm vào danh sách locations
    if hotel.get('location') and hotel['location'].get('coordinates'):
        locations.append(hotel['location']['coordinates'])

    # Kiểm tra từng địa điểm (place) và thêm vào locations nếu có thông tin hợp lệ
    for place in places:
        if place.get('location') and place['location'].get('coordinates'):
            locations.append(place['location']['coordinates'])

    # Đảm bảo rằng locations không bị rỗng hoặc có giá trị None
    if not locations:
        st.write("Không có tọa độ hợp lệ để tính toán lộ trình.")
    else:
        # Tiến hành các bước tiếp theo, ví dụ: vẽ bản đồ, tính toán thời gian, v.v.
        pass

    for i in range(len(locations) - 1):
        # Kiểm tra tọa độ hợp lệ trước khi tính toán
        lat1, lon1 = locations[i]
        lat2, lon2 = locations[i + 1]

        if lat1 is not None and lon1 is not None and lat2 is not None and lon2 is not None:
            # Thời gian di chuyển
            distance_meters = haversine([lat1, lon1], [lat2, lon2])
            distance_km = distance_meters  # Khoảng cách tính bằng km
            travel_time_hours = distance_km / speed_kmh  # Tính thời gian di chuyển
            travel_time = timedelta(hours=travel_time_hours)
            total_time += travel_time

            # Thời gian ở địa điểm
            place = places[i]
            if 'tour_duration' in place:
                # Nếu có thời gian du lịch tại địa điểm, cộng vào tổng thời gian
                total_time += parse_tour_duration(place['tour_duration'])
            else:
                # Nếu không có, giả sử là 1 giờ
                total_time += timedelta(hours=1)

        else:
            st.write(f"**Lỗi tọa độ không hợp lệ tại địa điểm {places[i]['name']} và {places[i+1]['name']}:**")
            st.write(f"  {lat1}, {lon1} và {lat2}, {lon2}")

    return total_time

# --- Hàm Tính Fitness ---
def parse_location(location):
    if isinstance(location, dict):
        return float(location.get('lat', 0)), float(location.get('lon', 0))
    elif isinstance(location, str):
        try:
            lat, lon = map(float, location.split(','))
            return lat, lon
        except ValueError:
            raise ValueError(f"Invalid location string: {location}")
    elif isinstance(location, (list, tuple)) and len(location) == 2:
        return float(location[0]), float(location[1])
    else:
        raise ValueError(f"Unexpected location format: {location}")

def compute_itinerary_fitness_experience(itinerary):
    hotel = itinerary['hotel']
    places = itinerary['places']

    # Helper function to parse locations

    # Parse places locations
    try:
        places_locations = [parse_location(place['location']) for place in places]
    except ValueError as e:
        print("Error parsing a place location:", e)
        return None

    # Calculate places score
    total_places_rating = sum(place.get('rating', 0) for place in places) * 20
    places_score = total_places_rating + len(places) * 40

    # Tính tổng khoảng cách di chuyển
    total_distance = 0
    locations = []

    # Check if hotel has location and coordinates
    if hotel.get('location') and hotel['location'].get('coordinates'):
        locations.append(hotel['location']['coordinates'])
    else:
        locations.append(None)  # Add None if hotel location is missing

    # Add places' locations if they exist
    locations.extend(places_locations)  # Add None if place location is missing

    # Filter out None values from locations list
    locations = [loc for loc in locations if loc is not None]

    # If there are no valid locations, return 0 fitness (or handle accordingly)
    if not locations:
        return 0  # or some other default value if no valid locations are found

    # Calculate the total distance for valid locations
    for i in range(len(locations) - 1):
        distance_meters = haversine(locations[i], locations[i + 1])
        total_distance += distance_meters / 1000  # Convert to kilometers

    distance_penalty = total_distance * 50  # Ưu tiên sau thời gian
    # Calculate time penalty
    total_time = calculate_total_time(itinerary)
    total_hours = total_time.total_seconds() / 3600
    time_penalty = (total_hours - 14) * 20 if total_hours > 14 else 0

    # Calculate average prices
    hotel_avg_price = (
        sum(hotel['price'].values()) / len(hotel['price'].values())
        if 'price' in hotel and hotel['price']
        else 0
    )

    attraction_avg_price = sum(
        sum(place.get('price', {}).values()) / len(place['price'].values())
        if 'price' in place and place['price']
        else 0
        for place in places
    )

    restaurant_avg_price = sum(
        place.get('average_price_per_person', 0) for place in places
    )

    total_price = hotel_avg_price + attraction_avg_price + restaurant_avg_price - distance_penalty
    price_penalty = total_price * 0.1  # Moderate importance to pricing

    # Compute final fitness score
    fitness = (
        places_score
        - time_penalty
        - price_penalty
        + hotel.get('rating', 0) * 5
        - distance_penalty
    )
    return fitness


# --- Hàm Tạo Quần Thể Ban Đầu ---

def generate_initial_population_experience(hotels, tourist_attractions, restaurants, pop_size):
    population = []
    # Lọc điểm tham quan theo yêu cầu
    filtered_attractions = tourist_attractions

    # Lọc nhà hàng theo yêu cầu
    filtered_restaurants = restaurants

    for _ in range(pop_size):
        itinerary = {}
        # Chọn khách sạn ngẫu nhiên
        itinerary['hotel'] = random.choice(hotels)
        # Chọn nhiều điểm tham quan
        num_places = random.randint(5, 8)
        all_places = filtered_attractions + filtered_restaurants
        if len(all_places) >= num_places:
            itinerary['places'] = random.sample(all_places, num_places)
        else:
            itinerary['places'] = all_places
        population.append(itinerary)
    return population

# --- Hàm Lai Ghép và Đột Biến (giữ nguyên từ phần trước) ---

def crossover_itineraries(parent1, parent2):
    child = {}
    child['hotel'] = random.choice([parent1['hotel'], parent2['hotel']])
    places1 = parent1['places']
    places2 = parent2['places']
    min_len = min(len(places1), len(places2))

    if min_len > 1:
        cut_point = random.randint(1, min_len - 1)
        child_places = places1[:cut_point] + places2[cut_point:]
    else:
        child_places = places1 + places2

    # Loại bỏ trùng lặp
    seen = set()
    unique_places = []
    for place in child_places:
        if place['name'] not in seen:
            unique_places.append(place)
            seen.add(place['name'])
    child['places'] = unique_places
    return child

def mutate_itinerary(hotels, tourist_attractions, restaurants,itinerary):
    if random.random() < 0.1:
        if len(itinerary['places']) > 0:
            index = random.randint(0, len(itinerary['places'])-1)
            new_place = random.choice(tourist_attractions + restaurants)
            itinerary['places'][index] = new_place
    if random.random() < 0.05:
        itinerary['hotel'] = random.choice(hotels)

def genetic_algorithm_experience(hotels, tourist_attractions, restaurants, generations=50, population_size=20):
    population = generate_initial_population_experience(hotels, tourist_attractions, restaurants, population_size)

    for generation in range(generations):
        fitness_scores = []
        for itinerary in population:
            fitness = compute_itinerary_fitness_experience(itinerary)
            fitness_scores.append((fitness, itinerary))
        fitness_scores.sort(reverse=True, key=lambda x: x[0])
        population = [it for (fit, it) in fitness_scores]

        num_selected = population_size // 2
        selected = population[:num_selected]
        offspring = []
        while len(offspring) < population_size - num_selected:
            parent1 = random.choice(selected)
            parent2 = random.choice(selected)
            child = crossover_itineraries(parent1, parent2)
            mutate_itinerary(hotels, tourist_attractions, restaurants,child)
            offspring.append(child)
        population = selected + offspring

    best_itinerary = population[0]
    best_fitness = compute_itinerary_fitness_experience(best_itinerary)
    return best_itinerary, best_fitness

In [9]:
import psycopg2

def build_sql_query_individual(table, requirements, general_requirements, schema="travel_database"):
    conditions = []
    joins = ""
    
    # Define the ID column based on the table
    id_column = "id"
    if table == "hotel":
        id_column = "hotel_id"
    elif table == "restaurant":
        id_column = "res_id"
    elif table == "touristattraction":
        id_column = "attraction_id"
    
    # Handle Price_range for each table with join and conditions
    if general_requirements.get("Price_range"):
        price_range = general_requirements["Price_range"]
        if table == "hotel":
            joins += f" JOIN {schema}.hotelprice ON {schema}.hotel.hotel_id = {schema}.hotelprice.hotel_id"
            if price_range == "low":
                conditions.append(f"{schema}.hotelprice.price < 500000")
            elif price_range == "medium":
                conditions.append(f"{schema}.hotelprice.price <= 2000000")
            elif price_range == "high":
                conditions.append(f"{schema}.hotelprice.price > 2000000")
        elif table == "restaurant":
            if price_range == "low":
                conditions.append(f"CAST({schema}.restaurant.price_range->>'max_price' AS INTEGER) < 200000")
            elif price_range == "medium":
                conditions.append(f"CAST({schema}.restaurant.price_range->>'max_price' AS INTEGER) <= 600000")
            elif price_range == "high":
                conditions.append(f"CAST({schema}.restaurant.price_range->>'min_price' AS INTEGER) >= 0")
        elif table == "touristattraction":
            joins += f" JOIN {schema}.attractionprice ON {schema}.touristattraction.attraction_id = {schema}.attractionprice.attraction_id"
            if price_range == "low":
                conditions.append(f"{schema}.attractionprice.price < 500000")
            elif price_range == "medium":
                conditions.append(f"{schema}.attractionprice.price < 1500000")
            elif price_range == "high":
                conditions.append(f"{schema}.attractionprice.price >= 0")
    
    if general_requirements.get("Transportation"):
        if general_requirements["Transportation"] == "self-drive car":
            if table == "restaurant":
                conditions.append(f"{schema}.restaurant.parking_available = TRUE")
            elif table == "hotel":
                conditions.append(f"('Bãi đậu xe' = ANY({schema}.hotel.amenities) OR 'Garage' = ANY({schema}.hotel.amenities))")

    if general_requirements.get("District"):
        district = general_requirements["District"]
        if table == "hotel":
            conditions.append(f"unaccent(lower(({schema}.hotel.address).district)) ILIKE unaccent('%{district}%')")
        elif table == "restaurant":
            conditions.append(f"unaccent(lower(({schema}.restaurant.address).district)) ILIKE unaccent('%{district}%')")
        elif table == "touristattraction":
            conditions.append(f"unaccent(lower(({schema}.touristattraction.address).district)) ILIKE unaccent('%{district}%')")
    # Process specific requirements for each table
    if table == "hotel":
        if requirements.get("Style"):
            styles_condition = (" OR ".join([f"{schema}.hotel.style LIKE '{style}%'" for style in requirements["Style"]]))
            conditions.append(f"({styles_condition})")
    elif table == "restaurant":
        if requirements.get("Restaurant_Type"):
            conditions.append(f"'{requirements['Restaurant_Type']}' = ANY({schema}.restaurant.restaurant_type)")
        if requirements.get("Suitable_For"):
    # Nếu Suitable_For là một mảng, chúng ta cần sử dụng ANY với một array trong SQL
            if isinstance(requirements["Suitable_For"], list):
                suitable_for_values = ", ".join([f"'{item}'" for item in requirements["Suitable_For"]])
                conditions.append(f"({suitable_for_values}) = ANY({schema}.restaurant.suitable_for)")
            else:
                # Nếu chỉ có một giá trị, xử lý như chuỗi bình thường
                conditions.append(f"'{requirements['Suitable_For']}' = ANY({schema}.restaurant.suitable_for)")
    elif table == "touristattraction":
        if requirements.get("Attraction_Type"):
    # Kiểm tra xem 'Attraction_Type' có phải là một danh sách hay không
            if isinstance(requirements["Attraction_Type"], list):
                # Nếu là danh sách, ta xây dựng điều kiện với ANY
                attraction_condition = (" OR ".join([f"'{attraction_type}' = ANY({schema}.touristattraction.attraction_type)" for attraction_type in requirements["Attraction_Type"]]))
            else:
                # Nếu là chuỗi, chỉ cần so sánh trực tiếp với 'ANY'
                attraction_condition = f"'{requirements['Attraction_Type']}' = ANY({schema}.touristattraction.attraction_type)"
    
    # Thêm điều kiện vào danh sách conditions
            conditions.append(attraction_condition)

    # Build the WHERE clause and complete query
    where_clause = " AND ".join(conditions)

    # JSON SELECT queries for each table
    json_select = {
        "hotel": f"""
            json_build_object(
                'hotel_id', {schema}.hotel.hotel_id,
                'name', {schema}.hotel.name,
                'address', {schema}.hotel.address,
                'location', ST_AsGeoJSON({schema}.hotel.location)::json,
                'price', (
                    SELECT json_object_agg(room_type, price)
                    FROM {schema}.hotelprice
                    WHERE {schema}.hotelprice.hotel_id = {schema}.hotel.hotel_id
                ),
                'amenities', {schema}.hotel.amenities,
                'style', {schema}.hotel.style,
                'rating', {schema}.hotel.rating,
                'description', {schema}.hotel.description,
                'img_url', {schema}.hotel.img_url,
                'comments', {schema}.hotel.comments
            )
        """,
        "restaurant": f"""
            json_build_object(
                'res_id', {schema}.restaurant.res_id,
                'name', {schema}.restaurant.name,
                'address', {schema}.restaurant.address,
                'location', ST_AsGeoJSON({schema}.restaurant.location)::json,
                'working_hour', {schema}.restaurant.working_hour,
                'suitable_for', {schema}.restaurant.suitable_for,
                'restaurant_type', {schema}.restaurant.restaurant_type,
                'rating', {schema}.restaurant.rating,
                'description', {schema}.restaurant.description,
                'price_range', {schema}.restaurant.price_range,
                'average_price_per_person', ((CAST({schema}.{table}.price_range->>'min_price' AS INTEGER) + CAST({schema}.{table}.price_range->>'max_price' AS INTEGER)) / 2),
                'parking_available', {schema}.restaurant.parking_available,
                'kids_play_area', {schema}.restaurant.kids_play_area,
                'img_url', {schema}.restaurant.img_url,
                'comments', {schema}.restaurant.comments
            )
        """,
        "touristattraction": f"""
            json_build_object(
                'attraction_id', {schema}.touristattraction.attraction_id,
                'name', {schema}.touristattraction.name,
                'address', {schema}.touristattraction.address,
                'location', ST_AsGeoJSON({schema}.touristattraction.location)::json,
                'price', (
                    SELECT json_object_agg(ticket_type, price)
                    FROM {schema}.attractionprice
                    WHERE {schema}.attractionprice.attraction_id = {schema}.touristattraction.attraction_id
                ),
                'attraction_type', {schema}.touristattraction.attraction_type,
                'working_hour', {schema}.touristattraction.working_hour,
                'rating', {schema}.touristattraction.rating,
                'tour_duration', {schema}.touristattraction.tour_duration,
                'description', {schema}.touristattraction.description,
                'img_url', {schema}.touristattraction.img_url,
                'comments', {schema}.touristattraction.comments
            )
        """
    }

    query = (
        f"SELECT {json_select[table]} AS {table}_info "
        f"FROM {schema}.{table} {joins} "
        f"WHERE {where_clause};"
        if conditions else
        f"SELECT {json_select[table]} AS {table}_info FROM {schema}.{table};"
    )

    return query
    
    
def convert_to_json_format(results):
    """
    Convert the output format from [('JSON object',), ('JSON object',)] 
    to [{'key': value}, {'key': value}]
    """
    return [result[0] for result in results]
    
def fetch_locations(query, postgres_url):
    conn = psycopg2.connect(postgres_url)
    cur = conn.cursor()

    cur.execute("CREATE EXTENSION IF NOT EXISTS unaccent;")
    # Thiết lập search_path
    cur.execute("""SET search_path TO travel_database, public;""")
    
    cur.execute(query)

    # Lấy tất cả các kết quả
    results = cur.fetchall()

    # Đóng kết nối
    cur.close()
    conn.close()

    return convert_to_json_format(results)


In [10]:
postgres_url = "postgresql://public_owner:7CBm0fdOPkgz@ep-sweet-field-a1urmrzw.ap-southeast-1.aws.neon.tech/public?sslmode=require"

In [11]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyBsSOZ5m1_JO_ayVSv2nWLOxJ4-jUqBUUw"

In [12]:
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [13]:
# amenities
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()

cur.execute("SET search_path TO travel_database, public;")

cur.execute("""
    SELECT DISTINCT unnest(amenities) AS unique_amenities
    FROM hotel;
""")

rows = cur.fetchall()

cur.close()
conn.close()

amenities_list = [row[0] for row in rows]
amenities_list_str = "\n    ".join(f'"{amenities_type}"' for amenities_type in amenities_list)

In [14]:
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()

cur.execute("SET search_path TO travel_database, public;")

cur.execute("""
    SELECT DISTINCT style
    FROM hotel
    WHERE style IS NOT NULL;
""")

rows = cur.fetchall()

cur.close()
conn.close()
style_list = [row[0] for row in rows]
style_list_str = "\n    ".join(f'"{style}"' for style in style_list)

In [15]:
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()

cur.execute("SET search_path TO travel_database, public;")

cur.execute("""
    SELECT DISTINCT unnest(attraction_type) AS unique_attraction_type
    FROM touristattraction;
""")

rows = cur.fetchall()

cur.close()
conn.close()

att_type_list = [row[0] for row in rows]
att_type_list_str = "\n    ".join(f'"{att_type}"' for att_type in att_type_list)

In [16]:
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()

# Set the search path to use the correct schema
cur.execute("SET search_path TO travel_database, public;")

# Query to extract distinct districts from the address composite type
cur.execute("""
    SELECT DISTINCT unnest(restaurant_type) AS unique_res_type
    FROM restaurant;
""")

# Fetch all rows
rows = cur.fetchall()

# Close the cursor and connection
cur.close()
conn.close()

# Convert the rows into a list and format the output
res_type_list = [row[0] for row in rows]
res_type_list_str = "\n    ".join(f'"{res_type}"' for res_type in res_type_list)

In [17]:
conn = psycopg2.connect(postgres_url)
cur = conn.cursor()

# Set the search path to use the correct schema
cur.execute("SET search_path TO travel_database, public;")

# Query to extract distinct districts from the address composite type
cur.execute("""
    SELECT DISTINCT unnest(suitable_for) AS unique_res_suit
    FROM restaurant;
""")

# Fetch all rows
rows = cur.fetchall()

# Close the cursor and connection
cur.close()
conn.close()

# Convert the rows into a list and format the output
res_suit_list = [row[0] for row in rows]
res_suit_list_str = "\n    ".join(f'"{res_suit}"' for res_suit in res_suit_list)

In [18]:
travel_type_list = ["Nghỉ dưỡng", "Khám phá"]
companion_list = ["friends", "family", "colleagues"]
transport_list = ["self-drive car", "motorbike", "bicycle", "public transport"]
city_list = ["Hà Nội"]
district_list = ["Ba Đình", "Hoàn Kiếm", "Tây Hồ", "Long Biên", "Cầu Giấy", "Đống Đa", "Hai Bà Trưng", "Hoàng Mai", "Thanh Xuân", "Nam Từ Liêm", "Bắc Từ Liêm", "Hà Đông", "Sơn Tây"]

In [19]:
template = """
You are an AI travel suggestion chatbot. Analyze the following travel request:

Request: "{travel_request}"

Extract general and specific requirements for Hotels, Restaurants, and Tourist Attractions, even if some are not explicitly mentioned. For each type, provide the following information:

**General Requirements:**
- Type:
  - If the request explicitly mentions "nghỉ dưỡng", "thư giãn", "resort", or similar keywords, and the overall tone is relaxed or leisure-oriented or have leisure activities, relaxation-focused activities(clear relaxation keywords), assign "Nghỉ dưỡng".
  - If the request explicitly mentions "khám phá", "văn hóa", "ẩm thực", or similar keywords, and the overall tone is exploratory or adventurous or exploration or have activity-focused activities (clear exploration keywords), assign "Khám phá".
  - For general requests or requests with mixed intentions, return `null`.
- Number_of_people: Extract the number of people or return null if not specified.
- Companions: Extract the companions mentioned in the request and translated it if it needed, must be one from this list: {companion_list} or return null if not specified.
- Transportation: Identify the transportation method mentioned in the request and translated, convert it if it needed, transportation must be one from this list: {transport_list} or return null if not specified.
- Time:
  - Extract specific dates or time ranges mentioned in the request, you should return only number of days.
  - If no specific dates are mentioned, check for keywords like "ngày", "tuần", "tháng" and their corresponding numbers.
  - Return null if there's no date or time ranges in the request.
  - For example, "3 ngày" should be extracted as "3";" 3 ngày 2 đêm" become "3".
- City: The mentioned city (without "city" or "province").
- District: The mentioned district (without "district") and must be one frin this list: {district_list} or else return null.
- Price_range: Specify as "low", "medium", or "high" based on the request.

**For Hotels, also identify:**
- Requirements: A summary text of specific requirements or preferences mentioned.
- Amenities: IMPORTANT - ONLY include amenities from {amenities_list} if EXPLICITLY mentioned in the request. 
  Examples:
  - If request says "need hotel with pool and gym" → include ["Pool", "Gym"]
  - If request doesn't mention any amenities → return null
  - Do NOT assume or add amenities that weren't specifically mentioned
- Style: Only include ONE style from this list if explicitly mentioned in the request: {style_list} or else return null

**For Restaurants, also identify:**
- Requirements: A summary text of specific requirements or preferences mentioned.
- Restaurant_Type: From this list: {restaurant_type_list} (Do Not add other infomations that don't have in the list)
- Suitable_For: From this list: {suitable_for_list}

**For Tourist Attractions, also identify:**
- Requirements: A list of specific requirements or preferences mentioned.
- Attraction_Type: From this list: {attraction_type_list} (Do Not add other infomations that don't have in the list)

Return the result using the following JSON format:

```json
{{
  "General": {{
    "Type": "...",
    "Number_of_people": "...",
    "Companion": "...",
    "Transportation": "...",
    "Time": "...",
    "City": "...",
    "District": "...",
    "Price_range": "...",
    "
  }},
  "Hotel": {{
    "Requirements": ...,
    "Amenities": [...],
    "Style": [...]
  }},
  "Restaurant": {{
    "Requirements": ...,
    "Restaurant_Type": "...",
    "Suitable_For": "..."
  }},
  "TouristAttraction": {{
    "Attraction_Type": "..."
  }}
}}

```
IMPORTANT RULES:
1. For lists (Amenities, Style), RETURN null if none are EXPLICITLY mentioned. Do NOT make assumptions or add information that isn't clearly stated or mentioned
2. Keep output strictly aligned with the provided lists

Ensure the JSON is valid. Use null for any unspecified information.
After the JSON output, add a note in Vietnamese:

"Nếu bạn cần thay đổi hoặc bổ sung bất kỳ thông tin nào, vui lòng cho tôi biết."
"""

In [20]:
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | llm

In [23]:
ask_template = """
You are an AI travel assistant chatbot. Analyze the following travel request:

Request: "{travel_output_json}"

### **Core Rules:**

1. **Identify Required Fields:**
    - Identify fields that are marked as "required" in the prompt.
    - If a required field is **null**, generate a question to clarify the user's preference. 

2. **STRICTLY DO NOT** generate questions for:
    - Fields with any **NON-NULL** value.
    - Fields without "required" marking in the prompt.

3. **City Validation:**
    - If the "City" field has a value but is not 'Hà Nội' , ask the user if they want to change the city.

4. **Additional Questions:**
    - Only ask additional questions about hotels, restaurants, or tourist attractions if all required fields in the "General" section have non-null values.

5. Questions about `"Time"` and `"Type"` MUST be ASKED FIRST ONLY if these fields are NULL but if these two features is not null in the {travel_output_json}, you should not ask again.

---

### **Verification Process:**
1. For the `General` section:
   - **Type** (required): Generate question ONLY if `Type` is `null`.
   - **Number_of_people** (required): Generate question ONLY if `Number_of_people` is `null`
   - **Companion** (required): Generate question ONLY if `Companion` is `null`.
   - **Transportation** (required):Generate question ONLY if `Transportation` is `null`.
   - **Time** (required): Generate question ONLY if `Time` is `null`.
   - **Price_range** (required): Generate question ONLY if `Price_range` is `null`.

2. For City validation:
   - If `"City"` is not 'Hà Nội' but has a value, ask if the user wants to change it.

3. Additional Question (when General is fully completed):
   - If ALL required `General` fields have NON-NULL values, ask:
     **"Bạn có muốn bổ sung thêm yêu cầu gì cho khách sạn, nhà hàng, hoặc địa điểm tham quan không?"**

4. STRICTLY SKIP any field with a non-`null` value.

---

### **Question Templates (ONLY use if field is NULL AND marked with *must ask question if this field is null, else not*):**
1. If `"Type"` is null, ask:  
   **"Bạn muốn tìm loại hình du lịch nào? (Ví dụ: Nghỉ dưỡng", "Khám phá"**
   Ignore the question about Type if only ask for one of Hotels, Restaurants, or Tourist Attractions.
   
2. If `"Number_of_people"` is null, ask:  
   **"Bạn đi bao nhiêu người? (Ví dụ: 1, 2, hoặc nhóm lớn hơn)"**

3. If `"Companion"` is null, ask:  
   **"Bạn đi cùng ai? (Bạn bè, Gia đình, hoặc Đồng nghiệp)"**

4. If `"Transportation"` is null, ask:  
   **"Bạn sẽ di chuyển bằng phương tiện gì? (Ví dụ: xe hơi tự lái, xe máy, hoặc phương tiện công cộng)"**

5. If `"Time"` is null, ask:  
   **"Bạn có kế hoạch đi vào thời gian nào không? (Ngày cụ thể hoặc khoảng thời gian)"**

6. If `"Price_range"` is null, ask:  
   **"Bạn muốn ngân sách cho chuyến đi này là bao nhiêu (thấp, trung bình, cao)?"**

7. Additional Question (when General is complete):
   **"Bạn có muốn bổ sung thêm yêu cầu gì cho khách sạn, nhà hàng, hoặc địa điểm tham quan không?"**

### **Special Case - City Validation:**
If City has a value but not 'Hà Nội':
**"Hiện tại chúng tôi chưa cung cấp dịch vụ cho thành phố này mà chỉ có tại Hà Nội, liệu bạn có muốn thay đổi thành phố không?"**

---

### **Output Format:**
1. Output questions ONLY for fields marked as `required` in the prompt and STRICTLY null.
2. Add city validation question if needed.
3. If ALL required `General` fields are NON-NULL, add the question about additional requirements for hotels, restaurants, or attractions.
4. End with: **"Nếu bạn cần thay đổi hoặc bổ sung bất kỳ thông tin nào, vui lòng cho tôi biết."**

---

### **Example Output:**
If the JSON input has:
- `"Transportation"`: `null`
- `"Time"`: `null`
- `"City"`: `"Đà Nẵng"` (not 'Hà Nội'),
  
The output will be:

```plaintext
Bạn sẽ di chuyển bằng phương tiện gì? (Ví dụ: xe hơi tự lái, xe máy, hoặc phương tiện công cộng)

Bạn có kế hoạch đi vào thời gian nào không? (Ngày cụ thể hoặc khoảng thời gian)

Hiện tại chúng tôi chưa cung cấp dịch vụ cho thành phố này mà chỉ có tại ['Hà Nội'], liệu bạn có muốn thay đổi thành phố không?

Nếu bạn cần thay đổi hoặc bổ sung bất kỳ thông tin nào, vui lòng cho tôi biết.
"""

In [24]:
ask_prompt = ChatPromptTemplate.from_template(ask_template)
ask_chain = ask_prompt | llm

In [25]:
updated_query = """
You are an AI travel suggestion chatbot. Analyze the following travel request:

Update request: "{update_travel_request}"

Extract general requirements from request while following these rules:
1. IMPORTANT: Preserve ALL non-null values from the initial request JSON
2. Only update fields that are null in the initial request OR if the city is explicitly changed
3. For new information, extract the following:

**General Requirements:**
- Type: ONLY CLASSIFY INTO TWO TYPES from {travel_type_list}. Analyze the response and map to:
  "Nghỉ dưỡng" if response mentions/implies relaxation-focused activities:
    Keywords to check:
    - "nghỉ mát", "nghỉ dưỡng", "thư giãn"
    - "resort", "spa", "biển"
    - "nghỉ ngơi", "thư thái"
    - "resort", "khách sạn sang trọng"
    Examples:
    - Response: "food tour" -> classify as: "Khám phá"
    - Response: "nghỉ dưỡng" -> classify as: "Nghỉ dưỡng"
    
  "Khám phá" if response mentions/implies exploration and activity-focused activities:
    Keywords to check:
    - "khám phá", "tham quan", "trải nghiệm"
    - "du lịch", "phượt", "tour"
    - "văn hóa", "ẩm thực", "food tour"
    - "địa điểm", "danh lam thắng cảnh"
    Examples:
    - Response: "trải nghiệm văn hóa" -> classify as: "Khám phá"
    - Response: "địa điểm tham quan" -> classify as: "Khám phá"
- Number_of_people: Extract the number of people.
- Companions: Extract the companions mentioned in the request and translated it if it needed, must be one from this list: {companion_list}.
- Transportation: Identify the transportation method mentioned in the request and translated, convert it if it needed, transportation must be one from this list: {transport_list}.
- Time: Any specific dates or time ranges mentioned.
- City: The mentioned city (without "city" or "province") and must be one from this list: {city_list}.
- Price_range: Specify as "low", "medium", or "high" based on the request.

**For Hotels, also identify:**
- Requirements: A summary text of specific requirements or preferences mentioned.
- Amenities: IMPORTANT - ONLY include amenities from {amenities_list} if EXPLICITLY mentioned in the request. 
  Examples:
  - If request says "need hotel with pool and gym" → include ["Pool", "Gym"]
  - If request doesn't mention any amenities → return null
  - Do NOT assume or add amenities that weren't specifically mentioned
- Style: Only include ONE style from this list if explicitly mentioned in the request: {style_list} or else return null.

**For Restaurants, also identify:**
- Requirements: A summary text of specific requirements or preferences mentioned.
- Restaurant_Type:Only get ONE type FROM this list: {restaurant_type_list}
- Suitable_For: From this list: {suitable_for_list}

**For Tourist Attractions, also identify:**
- Requirements: A list of specific requirements or preferences mentioned.
- Attraction_Type: From this list: {attraction_type_list}

Initial request: "{travel_output_json}"

Merge the initial request with any updates, prioritizing:
1. Keeping all non-null values from initial request
2. Only updating null fields or explicitly changed city
3. Using the following JSON format:

```json
{{
  "General": {{
    "Type": "...",
    "Number_of_people": "...",
    "Companion": "...",
    "Transportation": "...",
    "Time": "...",
    "City": "...",
    "District": "...",
    "Price_range": "...",
    "
  }},
  "Hotel": {{
    "Requirements": ...,
    "Amenities": [...],
    "Style": [...]
  }},
  "Restaurant": {{
    "Requirements": ...,
    "Restaurant_Type": "...",
    "Suitable_For": "..."
  }},
  "TouristAttraction": {{
    "Attraction_Type": "..."
  }}
}}

```

IMPORTANT VERIFICATION STEPS:
1. Before outputting, for all other fields, verify that all non-null values from the initial request are preserved unless explicitly changed in the update request.
2. Check if the update is a response to the Type question. If yes, analyze the response using the keyword mapping above
3. Classify into either "Nghỉ dưỡng" or "Khám phá"

Ensure the JSON is valid. Use null for any unspecified information.
After the JSON output, add a note in Vietnamese:

"Nếu bạn cần thay đổi hoặc bổ sung bất kỳ thông tin nào, vui lòng cho tôi biết."
"""

In [26]:
update_prompt = ChatPromptTemplate.from_template(updated_query)
update_chain = update_prompt | llm

In [27]:
def user_requires(chain, query, travel_type_list, companion_list, transport_list, city_list, district_list, 
                  amenities_list_str, style_list_str, res_type_list_str, res_suit_list_str, att_type_list_str):
    response = chain.invoke({
        "travel_request": query,
        "travel_type_list": travel_type_list,
        "companion_list": companion_list,
        "transport_list": transport_list,
        "city_list": city_list,
        "district_list": district_list,
        "amenities_list": amenities_list_str,
        "style_list": style_list_str,
        "restaurant_type_list": res_type_list_str,
        "suitable_for_list": res_suit_list_str,
        "attraction_type_list": att_type_list_str
    })

    # Extract and parse the JSON response
    try:
        json_match = re.search(r'\{.*\}', response.content, re.DOTALL)
        if json_match:
            result_dict = json.loads(json_match.group(0))
            
            # Print the JSON result
            print("Extracted JSON Result:")
            print(json.dumps(result_dict, indent=2, ensure_ascii=False))
            return result_dict
        else:
            print("No JSON object found in the response.")
            return None
    except json.JSONDecodeError as e:
        print("Failed to decode JSON:", e)
        print("Raw response:", response.content)
        return None

In [28]:
user_query = """
Gợi ý cho tôi một chuyến đi nghỉ dưỡng tại Hà Nội trong 3 ngày với chí phí hợp lý. Chúng tôi đi máy bay tới, không có xe cá nhân.
Gia đình tôi đi 4 người.
"""

In [30]:
user_requires_respond = user_requires(chain, user_query, travel_type_list, companion_list, transport_list, city_list, district_list, amenities_list_str, style_list_str, res_type_list_str, res_suit_list_str, att_type_list_str)


Extracted JSON Result:
{
  "General": {
    "Type": "Nghỉ dưỡng",
    "Number_of_people": 4,
    "Companion": "family",
    "Transportation": "public transport",
    "Time": 3,
    "City": "Hà Nội",
    "District": null,
    "Price_range": "low"
  },
  "Hotel": {
    "Requirements": "Chí phí hợp lý",
    "Amenities": null,
    "Style": null
  },
  "Restaurant": {
    "Requirements": null,
    "Restaurant_Type": null,
    "Suitable_For": "Ăn gia đình"
  },
  "TouristAttraction": {
    "Requirements": null,
    "Attraction_Type": null
  }
}


In [None]:
general_requirements = user_requires_respond.get("General", {})
hotel_requirements = user_requires_respond.get("Hotel", {})
restaurant_requirements = user_requires_respond.get("Restaurant", {})
attraction_requirements = user_requires_respond.get("TouristAttraction", {})
#fetch data