In [14]:
import pandas as pd
import re
import numpy as np
from IPython.display import display
import chromadb_lib as cdb
import flight_utils as fu
import hotels_utils as hu


import google.generativeai as genai


In [15]:
GEMINI_API_KEY = "AIzaSyBNHTOBkzNbpcywUrTWruS3d_wFKTayPqA"
genai.configure(api_key=GEMINI_API_KEY)

In [3]:
hotel1 = pd.read_csv("rome_five_star_hotel.csv")
hotel2 = pd.read_csv("rome_four_star_hotels.csv")
hotels_df = pd.concat([hotel1, hotel2], ignore_index=True)
flights_df = pd.read_csv("flights_data.csv") 

In [4]:
import re
from dateutil import parser as dtparse

# Accepts "10th of November 2025" OR "24th November 2025" OR ISO
_DATE_TOKEN_RE = re.compile(
    r'(\d{1,2}(?:st|nd|rd|th)?(?:\s+of)?\s+\w+\s+\d{4}|'
    r'\d{4}-\d{2}-\d{2})',
    flags=re.I
)

_FROM_TO_RE = re.compile(
    r'\bfrom\s+(?P<d1>[^,.;\n]+?)\s+to\s+(?P<d2>[^,.;\n]+?)(?:[.,;]|\s|$)',
    flags=re.I
)

def _parse_one_date(s):
    try: return dtparse.parse(s, fuzzy=True, dayfirst=False).date()
    except Exception: return None

def extract_date_range(text: str):
    m = _FROM_TO_RE.search(text)
    if m:
        d1, d2 = _parse_one_date(m.group('d1')), _parse_one_date(m.group('d2'))
        if d1 and d2: return d1, d2

    toks = _DATE_TOKEN_RE.findall(text)
    if len(toks) >= 2:
        d1, d2 = _parse_one_date(toks[0]), _parse_one_date(toks[1])
        if d1 and d2: return d1, d2
    if len(toks) == 1:
        return _parse_one_date(toks[0]), None
    return None, None


In [5]:
from datetime import date
from typing import Optional, Literal
from pydantic import BaseModel

TimeWindow = Literal["morning","afternoon","evening","night"]

class TripQuery(BaseModel):
    origin: str
    destination: str
    start_date: Optional[date] = None
    end_date: Optional[date] = None
    adults: int = 1
    children: int = 0
    cabin: Literal["ECONOMY","PREMIUM_ECONOMY","BUSINESS","FIRST"] = "ECONOMY"
    depart_window: Optional[TimeWindow] = None
    arrive_window: Optional[TimeWindow] = None
    return_window: Optional[TimeWindow] = None
    hotel_country: Optional[str] = None
    hotel_city: Optional[str] = None
    hotel_min_rating: Optional[float] = None
    hotel_prefs_text: Optional[str] = None

CITY_TO_IATA = {"zurich": "ZRH", "rome": "FCO", "singapore": "SIN", "frankfurt":"FRA"}
COUNTRY_BY_CITY = {"rome":"Italy","zurich":"Switzerland","frankfurt":"Germany","singapore":"Singapore"}
WINDOW_MAP = {"morning":"morning","afternoon":"afternoon","evening":"evening","night":"night",
              "late night":"night","late-night":"night","late evening":"evening","early morning":"morning"}
CABIN_MAP = {"economy":"ECONOMY","premium economy":"PREMIUM_ECONOMY","business":"BUSINESS","first":"FIRST"}

def _normalize_city_or_iata(s:str)->str:
    t=s.strip().lower()
    if len(t)==3 and t.isalpha(): return t.upper()
    return CITY_TO_IATA.get(t, s.title())

def _extract_rating(text:str)->Optional[float]:
    m=re.search(r'(\d)\s*(?:star|\*|-star)', text.lower())
    if m: return float(m.group(1))
    words={"one":1,"two":2,"three":3,"four":4,"five":5}
    for w,n in words.items():
        if f"{w} star" in text.lower(): return float(n)
    return None

def _time_window(text:str)->Optional[str]:
    t=text.lower()
    for k,v in WINDOW_MAP.items():
        if k in t: return v
    return None

def parse_user_text_to_tripquery(text:str)->TripQuery:
    # from X to Y
    m=re.search(r'\bfrom\s+([A-Za-z]{3,}?)\s+to\s+([A-Za-z]{3,}?)\b', text, re.I)
    origin_raw=m.group(1) if m else ""
    dest_raw=m.group(2) if m else ""
    # dates
    sd, ed = extract_date_range(text)
    # pax
    m_ad=re.search(r'(\d+)\s+adult', text, re.I)
    m_ch=re.search(r'(\d+)\s+child', text, re.I)
    adults=int(m_ad.group(1)) if m_ad else 1
    children=int(m_ch.group(1)) if m_ch else 0
    # cabin
    cabin="ECONOMY"
    for k,v in CABIN_MAP.items():
        if k in text.lower(): cabin=v; break
    # windows
    depart=_time_window(text)
    ret=None
    m_ret=re.search(r'return.*?(morning|afternoon|evening|night)', text, re.I)
    if m_ret: ret=WINDOW_MAP[m_ret.group(1).lower()]
    # rating & hotel prefs/location
    min_rating=_extract_rating(text)
    origin=_normalize_city_or_iata(origin_raw)
    dest=_normalize_city_or_iata(dest_raw)
    hotel_city=dest if len(dest)!=3 else next((k.title() for k,v in CITY_TO_IATA.items() if v==dest), None)
    hotel_country=COUNTRY_BY_CITY.get(hotel_city.lower(), None) if hotel_city else None
    #hotel prefs fix
    return TripQuery(
        origin=origin,
        destination=dest,
        start_date=sd,
        end_date=ed,
        adults=adults,
        children=children,
        cabin=cabin,
        depart_window=depart,
        arrive_window=None,
        return_window=ret,
        hotel_country=hotel_country,
        hotel_city=hotel_city,
        hotel_min_rating=min_rating,
        hotel_prefs_text=text
    )

In [6]:
hotels_index = hu.init_hotel_index("./chroma_storage", "hotel_information", "all-MiniLM-L6-v2")
hu.ingest_hotels(hotels_index, hotels_df)
parsed_flights = fu.parse_flights_df(flights_df, summary_col ="summary")

In [7]:
user_input = "Hi can you plan me a trip from Zurich to Rome from 10th of november 2025 to 24th november 2025, for 2 adults and 1 children, with an economy budget, evening flight and a preference for a hotel with 4 stars and near vatican"

In [8]:
trip = parse_user_text_to_tripquery(user_input)
print(trip)

origin='ZRH' destination='FCO' start_date=datetime.date(2025, 11, 10) end_date=datetime.date(2025, 11, 24) adults=2 children=1 cabin='ECONOMY' depart_window='evening' arrive_window=None return_window=None hotel_country='Italy' hotel_city='Rome' hotel_min_rating=4.0 hotel_prefs_text='Hi can you plan me a trip from Zurich to Rome from 10th of november 2025 to 24th november 2025, for 2 adults and 1 children, with an economy budget, evening flight and a preference for a hotel with 4 stars and near vatican'


In [10]:
# Flights: pick the cheapest valid itinerary
best_flight, flight_cands = fu.pick_cheapest(
    parsed_flights,
    origin=trip.origin,
    destination=trip.destination,
    start_date=trip.start_date,
    end_date=trip.end_date,
    fare_type=trip.cabin,
    depart_window=trip.depart_window,
    arrive_window=trip.arrive_window,
    return_window=trip.return_window,
    return_top_n=3,   # keep a few for context
)

# Hotels: semantic prefs + filters
hotel_hits = hu.search_hotels(
    hotels_index,
    prefs_text=trip.hotel_prefs_text,
    top_n=5,
    country=trip.hotel_country,
    city=trip.hotel_city,
    min_rating=trip.hotel_min_rating or 4.0,
)

In [12]:
#gemini context formatting

def _dtfmt(ts): 
    return pd.to_datetime(ts).strftime("%d %b %Y %H:%M")

def flights_context(best, cands: pd.DataFrame)->str:
    if best is None or cands.empty:
        return "No flight options matched the filters."
    lines=[]
    for _, r in cands.iterrows():
        lines.append(
            f"* {r['airlines']} — {r['origin']} {_dtfmt(r['out_dep_dt'])} → {r['destination']} {_dtfmt(r['out_arr_dt'])} "
            f"|| Return {r['ret_origin']} {_dtfmt(r['ret_dep_dt'])} → {r['ret_destination']} {_dtfmt(r['ret_arr_dt'])} "
            f"| {r['currency']} {r['total_price']:.2f} | A{r['adults']} C{r['children']} {r['cabin'].upper()}"
        )
    return "\n".join(lines)

def hotels_context(hits)->str:
    if not hits:
        return "No hotel options matched the filters."
    lines=[]
    for h in hits:
        rating = f"{h.get('rating'):.0f}★" if h.get('rating') else (h.get('rating_raw') or "N/A")
        addr = h.get("address") or ""
        url  = h.get("website") or ""
        lines.append(
            f"* {h.get('name','N/A')} — {h.get('city','')}, {h.get('country','')}\n"
            f"  Rating: {rating}\n"
            f"  Address: {addr}\n"
            f"  Website: {url}"
        )
    return "\n".join(lines)

CTX = f"""[FLIGHTS]
{flights_context(best_flight, flight_cands)}

[HOTELS]
{hotels_context(hotel_hits)}
"""
print(CTX[:800])  # preview

#address website query fix

[FLIGHTS]
* Easyjet — ZRH 10 Nov 2025 21:55 → FCO 10 Nov 2025 23:30 || Return FCO 24 Nov 2025 06:00 → ZRH 24 Nov 2025 07:35 | EUR 278.03 | A2 C1 ECONOMY
* Swiss — ZRH 10 Nov 2025 17:50 → FCO 10 Nov 2025 19:25 || Return FCO 24 Nov 2025 20:15 → ZRH 24 Nov 2025 21:50 | EUR 298.37 | A2 C1 ECONOMY
* Easyjet — ZRH 10 Nov 2025 21:55 → FCO 10 Nov 2025 23:30 || Return FCO 24 Nov 2025 19:40 → ZRH 24 Nov 2025 21:20 | EUR 328.41 | A2 C1 ECONOMY

[HOTELS]
* Generator Rome — Rome, Italy
  Rating: 4★
  Address: 251 Via Principe Amedeo 257 00185 Roma
  Website: https://generatorhostels.com/destinations/rome
* Hotel Domidea — Rome, Italy
  Rating: 4★
  Address: Via Raffaele Costi 17/21 00155 Roma
  Website: http://www.hoteldomidea.com
* Mozart — Rome, Italy
  Rating: 4★
  Address: Via Dei Greci 23B 00187 R


In [13]:
SYSTEM_PROMPT = """You are a meticulous travel planning assistant.
Use only the context blocks provided for:
- Flight details
- Hotel options
Do not fabricate or assume details not present in the context.

When responding:
1) Extract key info (origin, destination, dates, travelers, budget/class preferences).
2) If anything is missing, ask the user for it.
3) Filter and pick the best options from the context according to the user's ask (cheapest flight).
4) Output EXACTLY in the required format."""

FORMAT_RULES = """
Output Format:

Flights (Round Trip)
* Outbound (<origin> → <destination>)
  <airline(s)> — <origin> <departure date/time> → <destination> <arrival date/time> | <currency> <price> | <passenger breakdown> <class>
* Return (<destination> → <origin>)
  <airline(s)> — <destination> <departure date/time> → <origin> <arrival date/time>

Hotel
* Hotel Name: <hotel name>
* Rating: <star rating>
* Address: <address>
* Website: <URL>

Formatting Rules:
- Passenger breakdown: A = Adult, C = Child (e.g., A2 C1).
- Write the full class type (e.g., “ECONOMY”).
- Dates/times format: “DD Mon YYYY HH:MM”.
"""

user_task = f"""User Message:
{user_input}

Context:
{CTX}

Follow the Output Format exactly. If any required field is missing in context, state what is missing and ask the user for it (instead of guessing)."""

# Attach the system prompt when you create the model
model = genai.GenerativeModel(
    model_name="gemini-2.5-flash",
    system_instruction=SYSTEM_PROMPT
)

# Generate a response using the model
resp = model.generate_content([FORMAT_RULES, user_task])

print(resp.text)

Flights (Round Trip)
* Outbound (Zurich → Rome)
  Easyjet — ZRH 10 Nov 2025 21:55 → FCO 10 Nov 2025 23:30 | EUR 278.03 | A2 C1 ECONOMY
* Return (Rome → Zurich)
  Easyjet — FCO 24 Nov 2025 06:00 → ZRH 24 Nov 2025 07:35

Hotel
* Hotel Name: Hotel della Conciliazione
* Rating: 4★
* Address: Via Borgo Pio 164 00193 Roma
* Website: http://www.gruppoloan.it/hdc/de/default.html
