In [None]:
# Content of file: m 1.ipynb

# """
# # Agentic RAG Travel Guide Chatbot - Core Logic Notebook
#
# This notebook contains the consolidated work for:
# - **Member A:** Data Ingestion, Preprocessing, Vectorization, and Vector DB Setup.
# - **Member B:** CrewAI Agent Orchestration & LLM Integration.
#
# Run cells sequentially. Ensure `tripadvisor_hotel_reviews.csv` is in the same directory and your `OPENAI_API_KEY` is set as an environment variable or in the relevant cell (using a `.env` file is recommended for security).
# """

# """
# ## Part 0: Initial Setup - Environment Variables & Imports
# """

# --- CODE CELL (id: initial_setup_env_imports_v2) ---
import os
from dotenv import load_dotenv
import sys
import uuid
print(sys.executable)
import tensorflow
print(tensorflow.__version__)
import keras
print(keras.__version__)
import tf_keras
print(tf_keras.__version__)

# Load environment variables from .env file (especially OPENAI_API_KEY)
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

if not OPENAI_API_KEY:
    print("WARNING: OPENAI_API_KEY not found in environment variables or .env file.")
    print("CrewAI agents (Member B's part) will not function correctly without it.")
    print("Please create a .env file in the root directory with: OPENAI_API_KEY='sk-your_key_here'")
else:
    print("OPENAI_API_KEY loaded successfully (from environment or .env file).")
# --- END CODE CELL ---

# """
# ## Part 1: Member A - Data Ingestion & Vector DB Setup
# """

# """
# ### 1.1. Imports for Data Ingestion & NLTK Setup
# """

# --- CODE CELL (id: member_a_cell_1_imports_nltk_v2) ---
import pandas as pd
import nltk
import re
from bs4 import BeautifulSoup
from datetime import datetime
import json
# os already imported

# Download NLTK data (punkt for sentence tokenization, punkt_tab for specific language data)
def download_nltk_resource(resource_name, resource_path):
    try:
        nltk.data.find(resource_path)
        print(f"NLTK '{resource_name}' resource found.")
    except LookupError:
        print(f"NLTK '{resource_name}' resource not found. Downloading...")
        nltk.download(resource_name, quiet=True)
        print(f"NLTK '{resource_name}' downloaded.")
    except Exception as e:
        print(f"Error checking/downloading NLTK '{resource_name}': {e}")

download_nltk_resource('punkt', 'tokenizers/punkt')
download_nltk_resource('punkt_tab', 'tokenizers/punkt_tab')
# --- END CODE CELL ---

# """
# ### 1.2. Load Dataset
# """

# --- CODE CELL (id: member_a_cell_2_load_data_v2) ---
dataset_path = "tripadvisor_hotel_reviews.csv"
df = pd.DataFrame() # Initialize as empty

if not os.path.exists(dataset_path):
    print(f"ERROR: Dataset file not found at {dataset_path}")
    print("Please download it from Kaggle (e.g., https://www.kaggle.com/datasets/andrewmvd/trip-advisor-hotel-reviews) and place it in the same directory as this notebook.")
else:
    try:
        df = pd.read_csv(dataset_path)
        print(f"Dataset '{dataset_path}' loaded successfully. Shape: {df.shape}")
        if not df.empty:
          print("Dataset Info:")
          df.info()
          print("\nFirst 5 rows of the dataset:")
          print(df.head())
    except Exception as e:
        print(f"Error loading dataset '{dataset_path}': {e}")
# --- END CODE CELL ---

# """
# ### 1.3. Preprocessing: Clean and Chunk Text
# """

# --- CODE CELL (id: member_a_cell_3_preprocess_v2) ---
def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = BeautifulSoup(text, "html.parser").get_text()
    # For JSON strings, backslashes in regex need to be double-escaped: \\\\ instead of \\
    text = re.sub(r'[^A-Za-z0-9\\s,.!?\\\'\"]', '', text) # Allowing basic punctuation including ' and "
    text = re.sub(r'\\s+', ' ', text).strip()  # For JSON: \\s became \\\\s
    return text

if not df.empty and 'Review' in df.columns:
    print("\nCleaning reviews...")
    df['cleaned_review'] = df['Review'].apply(clean_text)
    print("Cleaned reviews (sample of original vs cleaned):")
    print(df[['Review', 'cleaned_review']].head())
elif df.empty:
    print("DataFrame is empty, skipping review cleaning.")
else:
    print("Column 'Review' not found in DataFrame, skipping review cleaning.")

def chunk_text(text, max_tokens=450):
    if not isinstance(text, str) or not text.strip():
        return []

    sentences = nltk.sent_tokenize(text)
    chunks = []
    current_chunk_sentences = []
    current_token_count = 0

    for sentence in sentences:
        sentence_tokens = nltk.word_tokenize(sentence)
        token_count_for_sentence = len(sentence_tokens)

        if token_count_for_sentence > max_tokens:
            if current_chunk_sentences:
                chunks.append(" ".join(current_chunk_sentences))
                current_chunk_sentences = []
                current_token_count = 0

            start = 0
            while start < token_count_for_sentence:
                sub_sentence_tokens = sentence_tokens[start : start + max_tokens]
                chunks.append(" ".join(sub_sentence_tokens))
                start += max_tokens
            continue

        if current_token_count + token_count_for_sentence <= max_tokens:
            current_chunk_sentences.append(sentence)
            current_token_count += token_count_for_sentence
        else:
            if current_chunk_sentences:
                chunks.append(" ".join(current_chunk_sentences))
            current_chunk_sentences = [sentence]
            current_token_count = token_count_for_sentence

    if current_chunk_sentences:
        chunks.append(" ".join(current_chunk_sentences))

    return [chunk for chunk in chunks if chunk.strip()]
# --- END CODE CELL ---

# """
# ### 1.4. Create Documents with Metadata & Save
# """

# --- CODE CELL (id: member_a_cell_4_create_docs_save_v2) ---
documents_for_indexing = []
if not df.empty and 'cleaned_review' in df.columns and 'Rating' in df.columns:
    print("\nCreating document chunks with metadata...")
    for idx, row in df.iterrows():
        cleaned_review_text = row['cleaned_review']
        original_review_identifier = idx
        try:
            rating_val = int(row["Rating"])
        except (ValueError, TypeError):
            rating_val = 0

        chunks = chunk_text(cleaned_review_text)
        for i, chunk_content in enumerate(chunks):
            if not chunk_content.strip(): continue

            chunk_unique_id = str(uuid.uuid4())

            documents_for_indexing.append({
                "id": chunk_unique_id,
                "content": chunk_content,
                "metadata": {
                    "original_review_id": original_review_identifier,
                    "source": f"tripadvisor_review_{original_review_identifier}",
                    "chunk_sequential_id_in_review": i,
                    "rating": rating_val,
                    "timestamp_processed": datetime.now().isoformat(),
                    "category": "hotel_review"
                }
            })
    print(f"Created {len(documents_for_indexing)} document chunks for indexing.")
    if documents_for_indexing:
        print("Example document structure:")
        print(json.dumps(documents_for_indexing[0], indent=2))

        processed_json_path = "processed_docs.json"
        processed_csv_path = "processed_docs.csv"

        print(f"\nSaving processed documents to {processed_json_path} and {processed_csv_path}...")
        try:
            with open(processed_json_path, "w", encoding="utf-8") as f:
                json.dump(documents_for_indexing, f, indent=2)
            print(f"Successfully saved to {processed_json_path}")

            df_to_save_data = []
            for doc in documents_for_indexing:
                flat_doc = {"id": doc["id"],"content": doc["content"],**doc["metadata"]}
                df_to_save_data.append(flat_doc)
            pd.DataFrame(df_to_save_data).to_csv(processed_csv_path, index=False, encoding="utf-8")
            print(f"Successfully saved to {processed_csv_path}")
        except Exception as e:
            print(f"Error saving processed documents: {e}")
else:
    print("\nSkipping document creation: DataFrame is empty or required columns ('cleaned_review', 'Rating') are missing.")
    documents_for_indexing = []
# --- END CODE CELL ---

# """
# ### 1.5. Vector Indexing & Retrieval Setup
# """

# --- CODE CELL (id: member_a_cell_5_vector_indexing_v2) ---
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient, models as qdrant_models

qdrant_collection_name = "travel_guide_rag_collection_v2"
qdrant_client_instance = None
embedding_model_instance = None

if documents_for_indexing:
    print("\nSetting up vector indexing...")
    model_name_for_embedding = "all-MiniLM-L6-v2"
    try:
        embedding_model_instance = SentenceTransformer(model_name_for_embedding)
        print(f"Embedding model '{model_name_for_embedding}' loaded.")

        qdrant_client_instance = QdrantClient(":memory:")
        print("Qdrant client initialized (in-memory).")

        vector_size = embedding_model_instance.get_sentence_embedding_dimension()
        print(f"Detected vector size for embeddings: {vector_size}")

        qdrant_client_instance.recreate_collection(
            collection_name=qdrant_collection_name,
            vectors_config=qdrant_models.VectorParams(size=vector_size, distance=qdrant_models.Distance.COSINE)
        )
        print(f"Qdrant collection '{qdrant_collection_name}' created/recreated.")

        content_list = [doc["content"] for doc in documents_for_indexing]
        print(f"Generating embeddings for {len(content_list)} content chunks... (This may take a while)")
        embeddings = embedding_model_instance.encode(content_list, show_progress_bar=True)
        print("Embeddings generated.")

        points_to_upsert = [
            qdrant_models.PointStruct(
                id=doc["id"],
                vector=embeddings[i].tolist(),
                payload={"text_content": doc["content"], **doc["metadata"]}
            )
            for i, doc in enumerate(documents_for_indexing)
        ]

        if points_to_upsert:
            print(f"Upserting {len(points_to_upsert)} points to Qdrant collection '{qdrant_collection_name}'...")
            qdrant_client_instance.upsert(collection_name=qdrant_collection_name, points=points_to_upsert, wait=True)
            print("Upsert complete.")
            collection_info = qdrant_client_instance.get_collection(collection_name=qdrant_collection_name)
            print(f"Collection '{qdrant_collection_name}' now contains {collection_info.points_count} points.")
        else:
            print("No points generated to upsert into Qdrant.")

    except Exception as e:
        print(f"Error during vector indexing setup or embedding process: {e}")
        qdrant_client_instance = None
        embedding_model_instance = None
else:
    print("\nSkipping vector indexing as no documents were processed or loaded for indexing.")
# --- END CODE CELL ---

# """
# ### 1.6. Retrieval Function (for Member B)
# """

# --- CODE CELL (id: member_a_cell_6_retrieval_func_v2) ---
def search_vector_db(query_text: str, top_k: int = 5) -> list[dict]:
    if not qdrant_client_instance or not embedding_model_instance:
        print("ERROR in search_vector_db: Qdrant client or embedding model is not initialized. Cannot perform search.")
        return []
    if not query_text or not isinstance(query_text, str):
        print("ERROR in search_vector_db: Query text is invalid.")
        return []

    try:
        query_embedding = embedding_model_instance.encode([query_text])[0]
    except Exception as e:
        print(f"Error encoding query text in search_vector_db: {e}")
        return []

    try:
      search_hits = qdrant_client_instance.search(
          collection_name=qdrant_collection_name,
          query_vector=query_embedding.tolist(),
          limit=top_k,
          with_payload=True
      )
    except Exception as e:
      print(f"Error during Qdrant search operation: {e}")
      return []

    formatted_search_results = []
    for hit in search_hits:
        payload = hit.payload if hit.payload else {}
        formatted_search_results.append({
            "id": str(hit.id),
            "score": float(hit.score),
            "content": payload.get("text_content", ""),
            "metadata": {k: v for k, v in payload.items() if k != "text_content"}
        })
    return formatted_search_results

print("`search_vector_db` function defined for retrieving documents.")

if qdrant_client_instance and embedding_model_instance and documents_for_indexing:
    try:
        collection_info_for_test = qdrant_client_instance.get_collection(collection_name=qdrant_collection_name)
        if collection_info_for_test.points_count > 0:
            test_search_query = "hotel with excellent spa facilities and city view"
            print(f"\n--- Testing `search_vector_db` function ---")
            print(f"Test Query: \"{test_search_query}\"")
            retrieved_search_results = search_vector_db(test_search_query, top_k=2)
            if retrieved_search_results:
                print(f"Found {len(retrieved_search_results)} results for test query:")
                for i, res_item in enumerate(retrieved_search_results):
                    print(f"  Result {i+1}: ID: {res_item['id']}, Score: {res_item['score']:.4f}")
                    print(f"    Content (snippet): {res_item['content'][:120]}...")
                    print(f"    Metadata (source): {res_item['metadata'].get('source', 'N/A')}")
            else:
                print("No results found for the test search query or the search operation failed.")
        else:
            print("Skipping retrieval function test as Qdrant collection is empty.")
    except Exception as e_test_search:
        print(f"Could not perform retrieval function test due to an error: {e_test_search}")
else:
    print("\nSkipping retrieval function test: Qdrant client, embedding model, or indexed documents are not ready.")
# --- END CODE CELL ---

# """
# ---
#
# ## Part 2: Member B - Agent Orchestration & LLM Lead
# """

# """
# ### 2.1. Imports for CrewAI & LLM Setup
# """

# --- CODE CELL (id: member_b_cell_1_imports_llm_v2) ---
from crewai import Agent, Task, Crew, Process
from crewai.tools import BaseTool
from langchain_openai import ChatOpenAI
# os already imported, OPENAI_API_KEY already loaded

llm_for_crewai = None

if not OPENAI_API_KEY:
    print("ERROR: OPENAI_API_KEY environment variable not set. CrewAI agents cannot be initialized.")
else:
    try:
        llm_for_crewai = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0.2, openai_api_key=OPENAI_API_KEY)
        print("OpenAI LLM for CrewAI initialized (gpt-3.5-turbo-0125).")
    except Exception as e_llm_init:
        print(f"Error initializing OpenAI LLM for CrewAI: {e_llm_init}. Check your API key and relevant package versions.")
# --- END CODE CELL ---

# """
# ### 2.2. Retrieval Agent Tool Setup
# """

# --- CODE CELL (id: member_b_cell_2_retrieval_tool_v2) ---
crewai_vector_db_tool = None
try:
    search_vector_db
    print("\n`search_vector_db` function from Member A's work is available.")

    class VectorDBQueryToolForCrew(BaseTool):
        name: str = "Travel Information Vector Database Query Tool"
        description: str = ("Use this specialized tool to query the travel vector database. "
                          "Input MUST be the user's specific query string. "
                          "The tool will find relevant hotel reviews or travel information snippets.")

        def _run(self, user_query: str) -> str:
            if not isinstance(user_query, str) or not user_query.strip():
                return "Error: Invalid input. The user query must be a non-empty string."

            print(f"[VectorDBQueryToolForCrew._run] Received query for DB search: '{user_query}'")
            search_results_from_db = search_vector_db(query_text=user_query, top_k=3)

            if not search_results_from_db:
                return "No relevant information snippets were found in the database for this specific query."

            formatted_tool_output = "Retrieved Information Snippets (Format: [Source, Rating, Relevance Score] Content):\n"
            for i, res_item in enumerate(search_results_from_db):
                content_snippet = res_item.get('content', 'N/A')
                metadata_info = res_item.get('metadata', {})
                score_val = res_item.get('score', 0.0)
                source_info = metadata_info.get('source', 'Unknown')
                rating_info = metadata_info.get('rating', 'N/A')
                formatted_tool_output += f"Snippet {i+1}: [{source_info}, Rating: {rating_info}, Score: {score_val:.3f}] {content_snippet[:300]}...\n---\n"
            return formatted_tool_output

    crewai_vector_db_tool = VectorDBQueryToolForCrew()
    print("VectorDBQueryToolForCrew (CrewAI tool) created successfully.")

    print("\n--- Testing the CrewAI Tool directly ---")
    test_query_for_crewai_tool_instance = "any good hotels in downtown with a gym and free breakfast?"
    print(f"Tool Test Query: '{test_query_for_crewai_tool_instance}'")
    retrieved_info_from_tool_test = crewai_vector_db_tool.run(test_query_for_crewai_tool_instance)
    print(f"Tool Retrieved Info (first 600 chars):\n{retrieved_info_from_tool_test[:600]}...")

except NameError:
    print("CRITICAL ERROR: `search_vector_db` function from Member A is not defined. Cannot create CrewAI tool. Please ensure Member A's cells are run successfully first.")
except Exception as e_tool_setup:
    print(f"An unexpected error occurred during CrewAI tool setup: {e_tool_setup}")
# --- END CODE CELL ---

# """
# ### 2.3. Define CrewAI Agents
# """

# --- CODE CELL (id: member_b_cell_3_define_agents_v2) ---
crewai_retriever_agent = None
crewai_summarizer_agent = None
crewai_composer_agent = None

if llm_for_crewai and crewai_vector_db_tool:
    print("\nDefining CrewAI agents...")
    crewai_retriever_agent = Agent(
        role='Travel Information Retrieval Specialist',
        goal=("Efficiently search the travel vector database using the 'Travel Information Vector Database Query Tool'. "
              "Your sole objective is to pass the user's query to this tool and return its exact output."),
        backstory=(
            "You are an AI assistant specialized in data retrieval. You have one tool: 'Travel Information Vector Database Query Tool'."
            "When given a user query, you must use this tool by providing the query as input. Do not attempt to answer the query yourself; only use the tool."
        ),
        tools=[crewai_vector_db_tool],
        llm=llm_for_crewai,
        verbose=True,
        allow_delegation=False,
        memory=False
    )
    print("CrewAI Retriever Agent defined.")

    crewai_summarizer_agent = Agent(
        role='Information Synthesis Expert',
        goal=("Take the collection of retrieved text snippets (output from the Retriever Agent) and the original user query. "
              "Identify the most critical information relevant to the query, eliminate redundancy, and produce a concise, factual summary."),
        backstory=(
            "You are an AI assistant skilled in processing and synthesizing textual information from multiple sources. "
            "You receive raw text snippets. Your task is to distill these into a coherent summary that directly addresses the user's needs as stated in their original query. Focus on facts and key opinions."
        ),
        llm=llm_for_crewai,
        verbose=True,
        allow_delegation=False,
        memory=False
    )
    print("CrewAI Summarizer Agent defined.")

    crewai_composer_agent = Agent(
        role='Travel Recommendation Composer and Advisor',
        goal=("Generate a helpful, well-formatted, and user-friendly travel recommendation or response. "
              "This response should be based *only* on the synthesized summary provided by the Summarizer Agent and should directly address the user's original query. "
              "Adopt a friendly, knowledgeable travel assistant persona."),
        backstory=(
            "You are a creative and articulate AI travel assistant. You receive a concise summary of relevant information. "
            "Your responsibility is to craft this summary into an engaging and practical piece of travel advice, an itinerary suggestion, or a direct recommendation. "
            "Pay close attention to tone (friendly, helpful), clarity, and formatting (e.g., use bullet points, bolding for emphasis if it enhances readability)."
        ),
        llm=llm_for_crewai,
        verbose=True,
        allow_delegation=False,
        memory=False
    )
    print("CrewAI Composer Agent defined.")
else:
    print("\nSkipping CrewAI agent definitions: LLM for CrewAI or the CrewAI Vector DB Tool is not initialized.")
# --- END CODE CELL ---

# """
# ### 2.4. Define Main Function for Crew Execution (for Member C) & Demo
# """

# --- CODE CELL (id: member_b_cell_4_crew_function_demo_v2) ---
def get_travel_recommendation_crewai(user_query: str) -> str:
    if not all([crewai_retriever_agent, crewai_summarizer_agent, crewai_composer_agent]):
        return "Error: One or more CrewAI agents are not initialized. Please check the setup and logs."
    if not isinstance(user_query, str) or not user_query.strip():
        return "Error: User query is invalid (empty or not a string)."

    print(f"\n--- Initiating CrewAI Process for Query: '{user_query}' ---")

    retrieval_task = Task(
        description=(
            f"A user is asking the following travel-related question: '{user_query}'. "
            f"Your primary objective is to use the 'Travel Information Vector Database Query Tool' by providing it with this exact query: '{user_query}'. "
            f"Ensure you extract all relevant text snippets from the database that could help answer this query."
        ),
        expected_output=(
            "A string containing several text snippets retrieved from the vector database via the tool. "
            "Each snippet should be clearly demarcated. This output will be passed to the Summarizer Agent for further processing."
        ),
        agent=crewai_retriever_agent,
    )

    summarization_task = Task(
        description=(
            f"You have received a collection of retrieved text snippets related to the user's original query: '{user_query}'. "
            f"Your task is to carefully analyze these snippets, identify the most critical and relevant pieces of information, "
            f"eliminate any redundancy or irrelevant details, and then synthesize a concise, factual summary. "
            f"The summary must directly address the key aspects of the user's query."
        ),
        expected_output=(
            "A short, coherent, and neutral summary of the most important facts and opinions extracted from the retrieved texts. "
            "This summary will be used by the Composer Agent to formulate the final user response."
        ),
        agent=crewai_summarizer_agent,
        context=[retrieval_task]
    )

    composition_task = Task(
        description=(
            f"You have received a concise summary of information pertinent to the user's query: '{user_query}'. "
            f"Your task is to craft a helpful, friendly, and well-formatted travel recommendation or direct answer for the user. "
            f"Use only the information from the provided summary. Adopt a knowledgeable travel assistant persona. "
            f"Format the response clearly, using bullet points or paragraphs as appropriate for readability."
        ),
        expected_output=(
            "The final, polished, user-facing response. It should be well-written, directly answer the user's query, "
            "provide practical recommendations if applicable, and be formatted for easy understanding. This is the ultimate output the user will see."
        ),
        agent=crewai_composer_agent,
        context=[summarization_task]
    )

    travel_recommendation_crew = Crew(
        agents=[crewai_retriever_agent, crewai_summarizer_agent, crewai_composer_agent],
        tasks=[retrieval_task, summarization_task, composition_task],
        process=Process.sequential,
        verbose=True,
        memory=False
    )

    print("\nKicking off the CrewAI travel recommendation process...")
    try:
        crew_execution_result = travel_recommendation_crew.kickoff()
        print("CrewAI process execution finished successfully.")
        return str(crew_execution_result)
    except Exception as e_crew_kickoff:
        print(f"CRITICAL ERROR during CrewAI kickoff or execution: {e_crew_kickoff}")
        return f"I'm sorry, an internal error occurred while processing your request with the AI crew. Details: {str(e_crew_kickoff)}"

print("`get_travel_recommendation_crewai` function defined and ready for use.")

if all([crewai_retriever_agent, crewai_summarizer_agent, crewai_composer_agent, OPENAI_API_KEY, llm_for_crewai]) :
    print("\n--- Running CrewAI Demo with Example Query 1 ---")
    example_user_query_crew_1 = "I want to find a quiet, charming boutique hotel in Paris, preferably in the Latin Quarter or Marais, with good reviews for cleanliness."
    final_response_from_crew_1 = get_travel_recommendation_crewai(example_user_query_crew_1)
    print(f"\n--- Final Response for Query: '{example_user_query_crew_1}' ---")
    print(final_response_from_crew_1)

    print("\n" + "="*80 + "\n")

    print("\n--- Running CrewAI Demo with Example Query 2 ---")
    example_user_query_crew_2 = "Suggest some unique cultural experiences or hidden gems in Rome for a solo traveler interested in history but wants to avoid huge crowds."
    final_response_from_crew_2 = get_travel_recommendation_crewai(example_user_query_crew_2)
    print(f"\n--- Final Response for Query: '{example_user_query_crew_2}' ---")
    print(final_response_from_crew_2)
else:
    print("\nSkipping CrewAI demo run: One or more critical components (agents, OpenAI API key, LLM) are not initialized. Please check previous cells for errors.")
# --- END CODE CELL ---

# """
# ---
#
# End of Core Logic Notebook (Member A & B Tasks).
#
# The `get_travel_recommendation_crewai(user_query)` function is now ready. Member C can use this function in the Streamlit UI (`app.py`) by either copying the relevant agent/tool/function definitions or by refactoring this notebook's logic into importable Python modules.
# """

/usr/local/bin/python3
2.19.0
3.9.2
2.19.0
OPENAI_API_KEY loaded successfully (from environment or .env file).
NLTK 'punkt' resource found.
NLTK 'punkt_tab' resource found.
Dataset 'tripadvisor_hotel_reviews.csv' loaded successfully. Shape: (20491, 2)
Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20491 entries, 0 to 20490
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  20491 non-null  object
 1   Rating  20491 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 320.3+ KB

First 5 rows of the dataset:
                                              Review  Rating
0  nice hotel expensive parking got good deal sta...       4
1  ok nothing special charge diamond member hilto...       2
2  nice rooms not 4* experience hotel monaco seat...       3
3  unique, great stay, wonderful time hotel monac...       5
4  great stay great stay, went seahawk game aweso...       5

Cleaning reviews...
Cleaned revie

  qdrant_client_instance.recreate_collection(


Batches:   0%|          | 0/641 [00:00<?, ?it/s]

Embeddings generated.
Upserting 20494 points to Qdrant collection 'travel_guide_rag_collection_v2'...


  qdrant_client_instance.upsert(collection_name=qdrant_collection_name, points=points_to_upsert, wait=True)


Upsert complete.
Collection 'travel_guide_rag_collection_v2' now contains 20494 points.
`search_vector_db` function defined for retrieving documents.

--- Testing `search_vector_db` function ---
Test Query: "hotel with excellent spa facilities and city view"


  search_hits = qdrant_client_instance.search(


Found 2 results for test query:
  Result 1: ID: 48972d80-17af-4ce8-816c-9cbd01aa1452, Score: 0.6264
    Content (snippet): greatplace,hotelstreetfrancatrainstationgreattravellingtrain,closemaincentrebeachesshopssiteswalkingdistance,hotelcleane...
    Metadata (source): tripadvisor_review_10146
  Result 2: ID: 1b2ccb87-89b2-4f51-8ea7-f1c805750f33, Score: 0.6082
    Content (snippet): bestbestreasonaffordablebetterhotelscityseattle,greattimestaygreatservicefriendlyemployees,locationconvenientparkingchea...
    Metadata (source): tripadvisor_review_20473
OpenAI LLM for CrewAI initialized (gpt-3.5-turbo-0125).

`search_vector_db` function from Member A's work is available.
VectorDBQueryToolForCrew (CrewAI tool) created successfully.

--- Testing the CrewAI Tool directly ---
Tool Test Query: 'any good hotels in downtown with a gym and free breakfast?'
Using Tool: Travel Information Vector Database Query Tool
[VectorDBQueryToolForCrew._run] Received query for DB search: 'any good hotels in d

  search_hits = qdrant_client_instance.search(


Tool Retrieved Info (first 600 chars):
Retrieved Information Snippets (Format: [Source, Rating, Relevance Score] Content):
Snippet 1: [tripadvisor_review_4248, Rating: 5, Score: 0.517] greathotellocationstayednightweekaugust,hotelcleanbeautiful,locationgreatrighteatoncentre,easyconnectionsubwayeasywalkdowntown,parkcarforgetit.alittlepriceyvacationerroom240taxesbeatsstayingoutsidecitybuckingtrafficmakingquestionablereservationhotelcitynotfeelsecure.go,...
---
Snippet 2: [tripadvisor_review_2368, Rating: 5, Score: 0.506] greathotel,stayed5daybreak,roomtowersgreatview,hotelstaffpoliteroomscleangreatlocation,...
---
Snippet 3: [tripadv...

Defining CrewAI agents...
CrewAI Retriever Agent defined.
CrewAI Summarizer Agent defined.
CrewAI Composer Agent defined.
`get_travel_recommendation_crewai` function defined and ready for use.

--- Running CrewAI Demo with Example Query 1 ---

--- Initiating CrewAI Process for Query: 'I want to find a quiet, charming boutique hotel in Paris, preferably i

Output()