# Libraries

In [None]:
import torch
import pandas as pd
from pathlib import Path
from langchain_nvidia_ai_endpoints import ChatNVIDIA
import json
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from dotenv import load_dotenv
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from typing import List, Optional
from pydantic import BaseModel, Field
from tqdm import tqdm
import argparse
import time
import pickle
from langchain.callbacks.base import BaseCallbackHandler
from langchain_mistralai import ChatMistralAI
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
import os
from tqdm.notebook import tqdm as notebook_tqdm
import time
from IPython.display import display, HTML

In [None]:
def green_progress_bar(iterable=None, total=None, desc=None, **kwargs):
    return notebook_tqdm(iterable=iterable, total=total, desc=desc, colour='#00cc66', **kwargs)

# Loading Environment Variables

In [None]:
load_dotenv()

# Setting Langsmith Tracing

In [None]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "Rag_travel_planner_v3.0.0"

# Load data dfs

In [None]:
base_path = Path().cwd()
landmark_prices = pd.read_csv(base_path / '../data/egypt_v0.1.csv')
places_api_data = pd.read_csv(base_path / '../data/places_details_v1.csv')

# Store Df rows in langchain Documents

In [None]:
documents = []

print("Processing landmark price data...")
for _, row in green_progress_bar(landmark_prices.iterrows(), total=len(landmark_prices), desc="Processing landmarks"):
    text = f"""
    Governorate: {row.get('Governorate/City', 'N/A')}
    Site: {row.get('Place', 'N/A')}
    Egyptian Ticket: {row.get('Egyptian', 'N/A')} EGP
    Egyptian Student Ticket: {row.get('EgyptianStudent', 'N/A')} EGP
    Foreign Ticket: {row.get('Foreign', 'N/A')} EGP
    Foreign Student Ticket: {row.get('ForeignStudent', 'N/A')} EGP
    Visiting Times: {row.get('VisitingTimes', 'N/A')}
    """
    documents.append(Document(page_content=text, metadata={"source": 'landmark_prices'}))

In [None]:
print("Processing places API data...")

for _, row in green_progress_bar(places_api_data.iterrows(), total=len(places_api_data), desc="Processing places"):
    text = f"""
    Place Name: {row.get('displayName.text', 'N/A')}
    Place Primary Type: {row.get('primaryTypeDisplayName.text', 'N/A')}
    Place Types: {row.get('types', 'N/A')}
    Place Price: {row.get('priceRange.endPrice.units', 'N/A')} EGP
    Place Price Level: {row.get('priceLevel', 'N/A')}
    Place Location: {row.get('formattedAddress', 'N/A')}
    Place Star Rating: {row.get('rating', 'N/A')}
    Place website: {row.get('websiteUri', 'N/A')}
    """
    documents.append(Document(page_content=text, metadata={"source": 'Places_api', 'Type': f"{row['primaryTypeDisplayName.text']}", 'city': f"{row['formattedAddress']}"}))

# Embedd Documents (text ---> vectors of numbers)

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name='intfloat/multilingual-e5-large-instruct',
    model_kwargs={'device': "cuda" if torch.cuda.is_available() else "cpu"},
    encode_kwargs={'normalize_embeddings': True}
)

In [None]:
path = base_path / '..' / 'faiss_e5large_v1.0'

if not path.exists():
    print("🚧 Index not found. Creating...")
            
    # Create progress indicator for index creation
    total_docs = len(documents)
    with tqdm(total=total_docs, desc="Creating FAISS index") as pbar:
        # We'll use a wrapper to update the progress bar
        def progress_update(batch_size=100):
            pbar.update(batch_size)
        
        # Create chunks to show progress
        chunk_size = 100
        chunked_docs = [documents[i:i + chunk_size] for i in range(0, len(documents), chunk_size)]
        
        # Start with first chunk
        vectorstore = FAISS.from_documents(chunked_docs[0], embeddings)
        progress_update(len(chunked_docs[0]))
        
        # Add remaining chunks
        for chunk in chunked_docs[1:]:
            vectorstore.add_documents(chunk)
            progress_update(len(chunk))
            
        # Save the index
        with tqdm(total=1, desc="Saving FAISS index") as save_pbar:
            vectorstore.save_local(path)
            save_pbar.update(1)
            
    print("✅ FAISS index created and saved.")
else:
    print("📂 FAISS index exists. Loading...")
    with green_progress_bar(total=1, desc="Loading FAISS index") as pbar:
        vectorstore = FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
        pbar.update(1)
    print("✅ FAISS index loaded.")

retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", 
    search_kwargs={
        "k": 50,                     
        "score_threshold": 0.5,      
    })

# Instantiating the LLM model with (Groq || google || nvidia)  provider.

In [None]:
llm = ChatNVIDIA(model="nvidia/llama-3.1-nemotron-ultra-253b-v1", temperature=0.2)
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash', temperature=0.3)

# Prompt Template

In [None]:
prompt_template = PromptTemplate(
    input_variables=["context", "user_query", "favorite_places", "visitor_type", "num_days", "budget"],
    template="""You are a helpful travel planner AI.
Use the context below, which contains information about ticket prices, place descriptions, restaurant details, and art gallery information.

Context:
{context}

User Query:
{user_query}

Additional Preferences:
- Favorite types of places: {favorite_places}
- Visitor type: {visitor_type} (e.g., Egyptian, Egyptian student, Foreign, or foreign student)
- Number of travel days: {num_days}
- Overall budget for all days: {budget} EGP
- Exclude hotels from the plan.
- Ensure that the itinerary includes at least 3 meals per day.

Based on the above, return a detailed {num_days}-day travel itinerary with approximate costs and suggestions. If some details are missing, make reasonable assumptions and indicate them.
"""
)

In [None]:
improved_prompt_template = PromptTemplate(
    input_variables=["context", "user_query", "favorite_places", "visitor_type", "num_days", "budget"],
    template="""You are an expert Egyptian travel planner with extensive knowledge of historical sites, cultural attractions, local cuisine, and hidden gems across Egypt. Your task is to create a personalized travel itinerary that matches the user's preferences and constraints.

### AVAILABLE INFORMATION:
{context}

### USER REQUEST:
{user_query}

### USER PREFERENCES:
- Favorite types of places: {favorite_places}
- Visitor category: {visitor_type} (Affects ticket pricing)
- Trip duration: {num_days} days
- Total budget: {budget} EGP for the entire trip

### DETAILED INSTRUCTIONS:
1. ATTRACTIONS SELECTION:
   - Prioritize attractions that SPECIFICALLY MATCH user's favorite place types
   - Select attractions with CONFIRMED AVAILABILITY and VISITING HOURS
   - Include EXACT TICKET PRICES for the appropriate visitor type ({visitor_type})
   - Consider geographic proximity to minimize travel time

2. DINING RECOMMENDATIONS:
   - Include exactly 3 meals per day (breakfast, lunch, dinner)
   - Recommend authentic Egyptian cuisine and local specialties
   - Include specific restaurant names, locations, and price ranges

3. TIME MANAGEMENT:
   - Create a REALISTIC timeline accounting for travel between locations
   - Allow sufficient time at major attractions (2-3 hours minimum for important sites)
   - Schedule meals at appropriate times (breakfast: 7-9 AM, lunch: 12-2 PM, dinner: 7-9 PM)
   - Include short breaks between activities

4. BUDGET ALLOCATION:
   - Track running costs for all activities and meals
   - Allocate budget appropriately across all days
   - Reserve 5% of budget for unexpected expenses
   - Prioritize MUST-SEE attractions even if they're more expensive

5. CULTURAL CONTEXT:
   - Provide brief historical/cultural significance for major attractions
   - Note any specific customs, dress codes, or etiquette for religious or cultural sites
   - Mention optimal visiting conditions (e.g., "best viewed at sunset")

DO NOT include hotels or accommodations in your plan.
DO NOT exceed the total budget provided.
DO NOT recommend places without confirmed existence in the provided context.
ALWAYS show exact or estimated costs for EACH attraction and meal.

Your response must follow the structured format required by the JSON schema, with complete details for each day's activities.
"""
)

In [None]:
# Add this improved budget-conscious prompt template
budget_conscious_prompt = PromptTemplate(
    input_variables=["context", "user_query", "favorite_places", "visitor_type", "num_days", "budget"],
    template="""You are an expert Egyptian travel planner with extensive knowledge of historical sites, cultural attractions, local cuisine, and hidden gems across Egypt. Your task is to create a personalized travel itinerary that STRICTLY ADHERES TO THE BUDGET CONSTRAINTS.

### AVAILABLE INFORMATION:
{context}

### USER REQUEST:
{user_query}

### USER PREFERENCES:
- Favorite types of places: {favorite_places}
- Visitor category: {visitor_type} (Affects ticket pricing)
- Trip duration: {num_days} days
- MAXIMUM TOTAL BUDGET: {budget} EGP for the entire trip (THIS IS A HARD CONSTRAINT)

### DETAILED INSTRUCTIONS:
1. BUDGET MANAGEMENT (HIGHEST PRIORITY):
   - The total cost MUST NOT EXCEED {budget} EGP under any circumstances
   - If necessary, REDUCE THE NUMBER OF ACTIVITIES per day to stay within budget
   - Allocate budget in this order of priority: (1) Must-see attractions, (2) Meals, (3) Secondary attractions
   - Track cumulative costs meticulously throughout the itinerary
   - Reserve 10% of budget for contingencies and transportation between sites

2. ATTRACTIONS SELECTION:
   - Prioritize attractions that match user's favorite place types AND provide the best value for money
   - For each attraction, include EXACT TICKET PRICES for {visitor_type} visitors
   - If an attraction is expensive but unmissable, compensate by selecting more affordable options for other activities
   - Consider free or low-cost alternatives when possible (e.g., viewpoints, markets, walking tours)

3. DINING RECOMMENDATIONS:
   - Include 3 meals per day with realistic costs
   - Balance between authentic experiences and budget constraints
   - For expensive destinations, suggest at least one affordable meal option per day
   - Include specific price estimates for each meal

4. TIME AND ACTIVITY MANAGEMENT:
   - If budget forces reduction in activities, focus on QUALITY over QUANTITY
   - Allow sufficient time at major attractions (2-3 hours minimum)
   - Group activities by geographic proximity to reduce transportation costs
   - Include at least one low-cost or free activity each day

5. BUDGET BREAKDOWN:
   - At the end of each day's itinerary, provide a running total of expenses
   - Clearly itemize all costs in the itinerary
   - If assumptions are made about costs, they should be CONSERVATIVE estimates

DO NOT include hotels or accommodations in your plan.
DO NOT exceed the total budget provided - this is a strict requirement.
DO NOT recommend places without confirmed existence in the provided context.
DO REDUCE the number of activities rather than exceeding the budget.

Your response must follow the structured format required by the JSON schema, with complete details for each day's activities and accurate cost tracking.
"""
)

# Structured Output

In [None]:
json_schema = {
    "title": "TravelItinerary",
    "description": "A structured travel itinerary for the user.",
    "type": "object",
    "properties": {
        "days": {
            "type": "array",
            "description": "List of days with planned activities.",
            "items": {
                "type": "object",
                "properties": {
                    "day": {"type": "string", "description": "Theme of the Day or Day label, e.g., 'Day 1'"},
                    "activities": {
                        "type": "array",
                        "des3                                           cription": "Activities planned for the day.",
                        "items": {
                            "type": "object",
                            "properties": {
                                "time": {"type": "string", "description": "Time of the activity"},
                                "activity": {"type": "string", "description": "Name of the activity"},
                                "location": {"type": "string", "description": "Location name"},
                                "price_range": {"type": "string", "description": "Price range or cost"},
                            },
                            "required": ["time", "activity", "location"]
                        }
                    },
                    "approximate_cost": {"type": "string", "description": "Total cost for the day"}
                },
                "required": ["day", "activities", "approximate_cost"]
            }
        },
        "total_approximate_cost": {
            "type": "string",
            "description": "Total cost for the trip"
        },
        "notes": {
            "type": "string",
            "description": "Any additional notes or assumptions"
        }
    },
    "required": ["days", "total_approximate_cost"]
}

In [None]:
structured_llm = llm.with_structured_output(json_schema)


In [None]:
def generate_enhanced_query1(destination, interests, visitor_type):
    """
    Generate an enhanced query that will retrieve more relevant documents from the vector store,
    with special focus on dining options and food experiences.
    
    Args:
        destination (str): The main destination (city or region) for the trip
        interests (list): List of specific interests like ["temples", "museums", "local cuisine"]
        visitor_type (str): Type of visitor (Egyptian, Foreign, etc.)
        num_days (int): Number of days for the trip
        budget (int): Budget in EGP
        
    Returns:
        str: Enhanced query for better retrieval
    """
    # Format the interests as a comma-separated string
    interests_str = ", ".join(interests)
    
    # Add food-related keywords to interests if not already present
    food_terms = ["restaurants", "cafes", "local cuisine", "dining", "food"]
    food_interests = [term for term in food_terms if term not in interests_str.lower()]
    if food_interests:
        interests_str += f", {', '.join(food_interests)}"
    
    # Construct a more specific query with keywords that match document content
    query = f"""
    Detailed travel plan for {destination} Egypt focusing on {interests_str}.
    I need comprehensive information about:
    
    ATTRACTIONS:
    1. Ticket prices for {visitor_type} visitors
    2. Opening hours and visiting times for attractions
    3. Cultural sites and museums in {destination}
    
    DINING OPTIONS (IMPORTANT):
    1. Popular restaurants in {destination} with price range between 100-300 EGP
    2. Coffee shops and cafes in {destination}
    3. Traditional Egyptian dining establishments
    4. Food markets and street food locations
    5. Breakfast locations open in the morning
    6. Lunch restaurants with good ratings
    7. Dinner options that serve authentic Egyptian cuisine
    8. Dessert places and sweet shops
    9. Specialty food items in {destination}
    10. Restaurant rating information and visitor reviews
    11. Restaurants with Nile views or special settings
    12. Seafood restaurants in {destination}
    
    Please include specific names, locations, price ranges, and opening hours for all food establishments.
    """
    
    return query.strip()

In [None]:

def generate_enhanced_query(destination, interests, visitor_type):
    """
    Generate an enhanced query that will retrieve more relevant documents from the vector store.
    
    Args:
        destination (str): The main destination (city or region) for the trip
        interests (list): List of specific interests like ["temples", "museums", "local cuisine"]
        visitor_type (str): Type of visitor (Egyptian, Foreign, etc.)
        num_days (int): Number of days for the trip
        budget (int): Budget in EGP
        
    Returns:
        str: Enhanced query for better retrieval
    """
    # Format the interests as a comma-separated string
    interests_str = ", ".join(interests)
    
    # Construct a more specific query with keywords that match document content
    query = f"""
    Detailed travel plan for {destination} Egypt focusing on {interests_str}.
    Need specific information about:
    1. Ticket prices for {visitor_type} visitors
    2. Opening hours and visiting times for attractions
    3. Highly rated places with at least 4-star ratings
    4. Cultural sites in {destination} with their exact locations
    5. Art galleries and museums in {destination}
    6. Popular local restaurants in {destination}
    7. Highly-rated cafes and coffee shops in {destination}
    8. Traditional Egyptian dining experiences and restaurants
    9. Local food markets and street food vendors
    10. Price ranges for restaurants and cafes
    11. Authentic dining options for breakfast, lunch, and dinner
    12. Food specialties and must-try dishes in {destination}
    """
    
    return query.strip()

destination = "Luxor"
interests = ["temples", "Valley of the Kings", "Karnak", "local cuisine", "art galleries"]
visitor_type = "Foreign"  # or "Egyptian", "Egyptian student", "foreign student"
num_days = 3
budget = 5000

enhanced_query = generate_enhanced_query1(destination, interests, visitor_type)

In [None]:

def generate_travel_plan(retriever_query, favorite_places, visitor_type, num_days, budget, city):
    docs = retriever.invoke(retriever_query)
    user_query = f'Plan a {num_days}-day trip in {city} with visits to {favorite_places}, and dining options.'
    context_text = "\n".join([doc.page_content for doc in docs])
    prompt = improved_prompt_template.format(
        context=context_text,
        user_query=user_query,
        favorite_places=favorite_places,
        visitor_type=visitor_type,
        num_days=num_days,
        budget=budget
    )
    response = structured_llm.invoke(prompt)
    return response

In [None]:
favorite_places = "Cultural sites, historical landmarks, art galleries"
travel_plan = generate_travel_plan(enhanced_query, favorite_places, visitor_type, num_days, budget, destination)
travel_plan