In [1]:
import pandas as pd
import re
import numpy as np
from IPython.display import display
import chromadb_lib as cdb
import flight_utils as fu
import hotels_utils as hu
import flight_api as fa


import google.generativeai as genai


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
GEMINI_API_KEY = "AIzaSyBNHTOBkzNbpcywUrTWruS3d_wFKTayPqA"
genai.configure(api_key=GEMINI_API_KEY)

In [4]:
hotels_df = pd.read_parquet("hotels.parquet")
#hotels_df = hotel[(hotel[' countyName']== 'Italy') & (hotel[' cityName']== 'Rome')]
hotels_df.head()

Unnamed: 0,countyCode,countyName,cityCode,cityName,HotelCode,HotelName,HotelRating,Address,Attractions,Description,FaxNumber,HotelFacilities,Map,PhoneNumber,PinCode,HotelWebsiteUrl
0,AL,Albania,106078,Albanien,1003300,De Paris Hotel,FourStar,Nr. 7 Brigada Viii Street Tirane,,Hotel de Paris is a charming boutique hotel th...,42268822,Private parking Parking onsite Television in c...,41.32213|19.81665,00355 4226 5009,1000,https://www.booking.com/hotel/al/de-paris.html
1,AL,Albania,106078,Albanien,1003301,Hotel Green,FourStar,Rruga Kavajes. Kombinat Km 2. Vaqarr VaqarrTir...,,"Located in a suburb of Tirana, Hotel Green is ...",35548520058,airport pick up wifi available in all areas Ai...,41.30413|19.74703,+35548520057,1041,https://www.booking.com/hotel/al/hotel-green.html
2,AL,Albania,106078,Albanien,1003302,Theranda Hotel,ThreeStar,Rr. Andon Zako Cajupi Villa 6 & 7 Villa 6 & 7T...,,"Set in Tirana, 1.2 km from Skanderbeg Square, ...",00355 (0)42273689,face masks for guests available all plates cu...,41.3216|19.81199,00355 (0)42273766,1019,https://www.booking.com/hotel/al/theranda.html
3,AL,Albania,106078,Albanien,1003303,Seven Hotel,ThreeStar,"KAVAJA STREET, CLOSE TURKISH AMBASSY TIRANA",Skanderbeg Square: within 500 metre,This hotel enjoys an enviable setting in Tiran...,,À la carte dinner Breakfast buffet Breakfast C...,41.328027|19.815052,,1001,http://www.hotelseven-tirana.com/
4,AL,Albania,106078,Albanien,1003325,Viktoria,ThreeStar,Rruga E Elbasanit Km 4 Sauk SaukTirana,,Located in a new residential area at the edge ...,+355695406986,internet services Ironing service Family rooms...,41.29125|19.85349,355 69 5406986,1000,https://www.booking.com/hotel/al/viktoria-sauk...


In [None]:
from datetime import date, datetime
from typing import Optional, Literal
from pydantic import BaseModel, ValidationError
import json

TimeWindow = Literal["morning","afternoon","evening","night"]

class TripQuery(BaseModel):
    origin: str
    destination: str
    start_date: Optional[date] = None
    end_date: Optional[date] = None
    adults: int = 1
    children: int = 0
    cabin: Literal["ECONOMY","PREMIUM_ECONOMY","BUSINESS","FIRST"] = "ECONOMY"
    depart_window: Optional[TimeWindow] = None
    arrive_window: Optional[TimeWindow] = None
    return_window: Optional[TimeWindow] = None
    hotel_country: Optional[str] = None
    hotel_city: Optional[str] = None
    hotel_min_rating: Optional[float] = None
    hotel_prefs_text: Optional[str] = None

def extractor(text: str) -> Optional[TripQuery]:
    """
    Parses user text into a TripQuery object using a Generative AI model.
    """
    # Initialize the Generative Model with JSON mode enabled
    try:
        model = genai.GenerativeModel(
            'gemini-2.5-flash',
            generation_config={"response_mime_type": "application/json"}
        )
    except Exception as e:
        print(f"Error initializing the model: {e}")
        return None

    # Pydantic's `model_json_schema` generates a schema the LLM can follow
    schema = TripQuery.model_json_schema()

    # The prompt provides the context, instructions, the schema, and the user text.
    # This guides the LLM to perform the extraction task accurately.
    prompt = f"""
    You are an expert travel assistant responsible for extracting structured data from user requests.
    Your goal is to parse the user's text and output a JSON object that strictly adheres to the provided schema.

    CONTEXT:
    - Today's date is: {datetime.now().strftime('%Y-%m-%d')}
    - Time window definitions: Departures between 6am-12pm are "morning", 12pm-5pm are "afternoon", 5pm-9pm are "evening", and 9pm-6am are "night".

    INSTRUCTIONS:
    1.  Analyze the user's text to extract all relevant travel details.
    2.  Use the provided mappings to normalize values. For example, if the user says "zurich", you must use the IATA code "ZRH". If they say "business class", use "BUSINESS".
    3.  If a value is not mentioned in the text, omit it or set it to null in the JSON.
    4.  Infer `hotel_city` and `hotel_country` from the main destination. If it is in the USA, write it as United States, and the city as [City],   [State] (the 3 blank spaces are intentional). For example: 'Abbeville,   Louisiana'
    5.  The `hotel_prefs_text` field should contain the original, unmodified user text.
    6.  Parse dates accurately. "Next Tuesday" should be calculated relative to today's date.
    7.  Your output MUST be a valid JSON object matching the schema below.

    SCHEMA:
    {json.dumps(schema, indent=2)}

    USER TEXT:
    "{text}"
    """

    try:
        response = model.generate_content(prompt)
        json_data = json.loads(response.text)
        
        # Use Pydantic to validate the JSON and create the TripQuery object
        trip_query = TripQuery(**json_data)
        return trip_query

    except json.JSONDecodeError:
        print("Error: The model did not return valid JSON.")
        print("Model output:", response.text)
        return None
    except ValidationError as e:
        print(f"Error: Pydantic validation failed.\n{e}")
        print("Model output:", response.text)
        return None
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return None

In [6]:
user_input = "Hi can you plan me a trip from Zurich to Rome from 10th of november 2025 to 24th november 2025, for 2 adults and 1 children, with an economy budget, evening flight and a preference for a hotel with 4 stars and near vatican"

In [7]:
trip = extractor(user_input)
print(trip)

origin='ZRH' destination='FCO' start_date=datetime.date(2025, 11, 10) end_date=datetime.date(2025, 11, 24) adults=2 children=1 cabin='ECONOMY' depart_window='evening' arrive_window=None return_window=None hotel_country='Italy' hotel_city='Rome' hotel_min_rating=4.0 hotel_prefs_text='hotel with 4 stars and near vatican'


In [8]:
hotels_index = hu.init_hotel_index("./chroma_storage", "hotel_information", "all-MiniLM-L6-v2")
hu.ingest_hotels(hotels_index, hotels_df, country= trip.hotel_country, city= trip.hotel_city)

Original hotel DataFrame count: 1010033
Filtered count for Rome, Italy: 2217


In [9]:
flight_results= fa.search_flights_api(trip)

Requesting endpoint: /api/v1/flights/searchFlights?fromId=ZRH.AIRPORT&toId=FCO.AIRPORT&adults=2&children=1&cabinClass=ECONOMY&stops=none&pageNo=1&sort=BEST&currency_code=SGD&departDate=2025-11-10&returnDate=2025-11-24
API Response Status: 200
01. Outbound: ZRH 2025-11-10T17:50:00 → FCO 2025-11-10T19:25:00 | Swiss | SGD 453.51 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T20:15:00 → ZRH 2025-11-24T21:50:00
02. Outbound: ZRH 2025-11-10T07:10:00 → FCO 2025-11-10T08:40:00 | Swiss | SGD 523.97 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T20:15:00 → ZRH 2025-11-24T21:50:00
03. Outbound: ZRH 2025-11-10T21:55:00 → FCO 2025-11-10T23:30:00 | Easyjet | SGD 541.71 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T06:00:00 → ZRH 2025-11-24T07:35:00
04. Outbound: ZRH 2025-11-10T08:15:00 → FCO 2025-11-10T09:50:00 | Easyjet | SGD 546.63 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T06:00:00 → ZRH 2025-11-24T07:35:00
05. Outbound: ZRH 2025-11-10T21:55:00 → FCO 2025-11-10T23:30:00 | Easyjet, Swiss | SGD 564.68 | A

In [10]:
# Flights: pick the cheapest valid itinerary
flight_context = "\n".join(flight_results)


# Hotels: semantic prefs + filters
hotel_hits = hu.search_hotels(
    hotels_index,
    prefs_text=trip.hotel_prefs_text,
    top_n=5,
    country=trip.hotel_country,
    city=trip.hotel_city,
    min_rating=trip.hotel_min_rating or 4.0,
)

In [None]:
#gemini context formatting

def _dtfmt(ts): 
    return pd.to_datetime(ts).strftime("%d %b %Y %H:%M")

def hotels_context(hits)->str:
    if not hits:
        return "No hotel options matched the filters."
    lines=[]
    for h in hits:
        rating = f"{h.get('rating'):.0f}★" if h.get('rating') else (h.get('rating_raw') or "N/A")
        addr = h.get("address") or ""
        url  = h.get("website") or ""
        desc = h.get("description") or "No description available."
        if len(desc) > 200:
            desc = desc 
        fac = h.get("facilities") or "No facilities listed."
        if len(fac) > 150:
            fac = fac
        lines.append(
            f"* {h.get('name','N/A')} — {h.get('city','')}, {h.get('country','')}\n"
            f"  Rating: {rating}\n"
            f"  Address: {addr}\n"
            f"  Website: {url}\n"
            f"  Description: {desc}\n"
            f"  Facilities: {fac}"
        )
    return "\n".join(lines)

CTX = f"""[FLIGHTS]
{flight_context}

[HOTELS]
{hotels_context(hotel_hits)}
"""
print(CTX)  # preview

#address website query fix

[FLIGHTS]
01. Outbound: ZRH 2025-11-10T17:50:00 → FCO 2025-11-10T19:25:00 | Swiss | SGD 453.51 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T20:15:00 → ZRH 2025-11-24T21:50:00
02. Outbound: ZRH 2025-11-10T07:10:00 → FCO 2025-11-10T08:40:00 | Swiss | SGD 523.97 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T20:15:00 → ZRH 2025-11-24T21:50:00
03. Outbound: ZRH 2025-11-10T21:55:00 → FCO 2025-11-10T23:30:00 | Easyjet | SGD 541.71 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T06:00:00 → ZRH 2025-11-24T07:35:00
04. Outbound: ZRH 2025-11-10T08:15:00 → FCO 2025-11-10T09:50:00 | Easyjet | SGD 546.63 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T06:00:00 → ZRH 2025-11-24T07:35:00
05. Outbound: ZRH 2025-11-10T21:55:00 → FCO 2025-11-10T23:30:00 | Easyjet, Swiss | SGD 564.68 | A2 C1 ECONOMY
    Return : FCO 2025-11-24T20:15:00 → ZRH 2025-11-24T21:50:00

[HOTELS]
* Hotel Alimandi Vaticano — Rome, Italy
  Rating: 4★
  Address: Viale Vaticano 99 00165 Roma
  Website: http://www.alimandivaticanohotel.com
  Desc

In [13]:
SYSTEM_PROMPT = """You are a meticulous travel planning assistant.
Use only the context blocks provided for:
- Flight details
- Hotel options
Do not fabricate or assume details not present in the context.

When responding:
1) Extract key info (origin, destination, dates, travelers, budget/class preferences).
2) If anything is missing, ask the user for it.
3) Filter and pick the best options from the context according to the user's ask e.g. (cheapest flight).
4) Output EXACTLY in the required format."""

FORMAT_RULES = """
Output Format:

Flights (Round Trip)
* Outbound (<origin> → <destination>)
  <airline(s)> — <origin> <departure date/time> → <destination> <arrival date/time> | <currency> <price> | <passenger breakdown> <class>
* Return (<destination> → <origin>)
  <airline(s)> — <destination> <departure date/time> → <origin> <arrival date/time>

Hotel
* Hotel Name: <hotel name>
* Rating: <star rating>
* Address: <address>
* Website: <URL>
* Description: <short description> summarised
* Facilities: <list of facilities> summarised

Suggested Itinerary:
* Feel free to suggest a brief itinerary based on the flight times and hotel location.

Formatting Rules:
- Passenger breakdown: A = Adult, C = Child (e.g., A2 C1).
- Write the full class type (e.g., “ECONOMY”).
- Dates/times format: “DD Mon YYYY HH:MM”.
"""

user_task = f"""User Message:
{user_input}

Context:
{CTX}

Follow the Output Format exactly. If any required field is missing in context, state what is missing and ask the user for it (instead of guessing)."""

# Attach the system prompt when you create the model
model = genai.GenerativeModel(
    model_name="gemini-2.5-flash",
    system_instruction=SYSTEM_PROMPT
)

# Generate a response using the model
resp = model.generate_content([FORMAT_RULES, user_task])

print(resp.text)

Flights (Round Trip)
* Outbound (Zurich → Rome)
  Swiss — ZRH 10 Nov 2025 17:50 → FCO 10 Nov 2025 19:25 | SGD 453.51 | A2 C1 ECONOMY
* Return (Rome → Zurich)
  Swiss — FCO 24 Nov 2025 20:15 → ZRH 24 Nov 2025 21:50

Hotel
* Hotel Name: Hotel Alimandi Vaticano
* Rating: 4★
* Address: Viale Vaticano 99 00165 Roma
* Website: http://www.alimandivaticanohotel.com
* Description: Located in Rome City Centre, this hotel is steps from Vatican Museums and a short drive from Castel Sant Angelo.
* Facilities: Dry cleaning/laundry service, Wheelchair accessible, Proposal/romance packages available, Television in common areas, Limo or Town Car service available

Suggested Itinerary:
*   **10 Nov 2025:** Arrive at FCO in the evening (19:25), transfer to Hotel Alimandi Vaticano, and check in. Settle into your accommodation.
*   **11-23 Nov 2025:** Explore Rome! With your hotel steps away from the Vatican Museums, begin your exploration of Vatican City, including St. Peter's Basilica and the Colosseum. 