# Load FAISS, Data, and LLM Securely

In [1]:
import os
import faiss
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from huggingface_hub import login
from langchain_community.llms import HuggingFaceHub

Load environment variables from .env

In [2]:
load_dotenv()
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")

In [3]:
login(token=HUGGINGFACE_API_KEY)

In [4]:
if HUGGINGFACE_API_KEY:
    print("Hugging Face API Key Loaded Successfully!")
else:
    print("ERROR: Hugging Face API Key not found. Check .env file.")

Hugging Face API Key Loaded Successfully!


# Authenticate with Hugging Face

In [5]:
import requests

In [6]:
HF_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
HF_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL}"

In [7]:
headers = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}
response = requests.get(HF_URL, headers=headers)

In [8]:
if response.status_code == 200:
    print(f"Successfully authenticated with Hugging Face API! Model: {HF_MODEL}")
elif response.status_code == 403:
    print("ERROR: Invalid API Key or access denied.")
elif response.status_code == 404:
    print(f"ERROR: Model '{HF_MODEL}' not found. Check the model name.")
else:
    print(f"ERROR {response.status_code}: {response.text}")

Successfully authenticated with Hugging Face API! Model: mistralai/Mistral-7B-Instruct-v0.3


# Load FAISS Index

In [9]:
faiss_index_path = "../faiss_index.bin"
faiss_index = faiss.read_index(faiss_index_path)

In [10]:
print(f"FAISS index loaded! Number of vectors: {faiss_index.ntotal}")

FAISS index loaded! Number of vectors: 87396


# Load Dataset with Booking Details

In [11]:
df = pd.read_csv("../data/hotel_bookings_with_embeddings.csv")

In [12]:
print("Dataset loaded! Sample data:")
print(df.head())

Dataset loaded! Sample data:
          hotel  is_canceled  lead_time  arrival_date_year  \
0  Resort Hotel            0        342               2015   
1  Resort Hotel            0        737               2015   
2  Resort Hotel            0          7               2015   
3  Resort Hotel            0         13               2015   
4  Resort Hotel            0         14               2015   

   arrival_date_month  arrival_date_week_number  arrival_date_day_of_month  \
0                   7                        27                          1   
1                   7                        27                          1   
2                   7                        27                          1   
3                   7                        27                          1   
4                   7                        27                          1   

   stays_in_weekend_nights  stays_in_week_nights  adults  ...   adr  \
0                        0                     0       2  

# Load LangChain LLM (Mistral)

In [13]:
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",  # Model name
    model_kwargs={"temperature": 0.7, "max_length": 200},  # Adjust parameters as needed
    huggingfacehub_api_token=HUGGINGFACE_API_KEY  # Use API Key from .env
)

  llm = HuggingFaceHub(


# Search FAISS & Get AI Response

In [14]:
def search_faiss(query, top_k=5):
    """
    Converts a user query into an embedding, searches FAISS, 
    and retrieves the most similar hotel bookings.
    """
    from sentence_transformers import SentenceTransformer

    # Load embedding model
    model = SentenceTransformer('all-MiniLM-L6-v2')

    # Convert query into an embedding
    query_embedding = model.encode([query])

    # Search FAISS for the top_k most similar results
    D, I = faiss_index.search(np.array(query_embedding, dtype=np.float32), k=top_k)

    # Retrieve matching rows from the dataset
    results = df.iloc[I[0]][['text_data', 'hotel', 'country', 'market_segment', 'reservation_status']]
    
    return results

In [15]:
def generate_ai_response(query, top_k=3):
    """
    Searches FAISS, retrieves relevant hotel bookings, 
    and generates a natural language response using LangChain.
    """
    # Retrieve similar bookings from FAISS
    results = search_faiss(query, top_k)

    # Format retrieved results into a readable summary
    context = "\n".join(
        [f"Hotel: {row['hotel']}, Country: {row['country']}, Market Segment: {row['market_segment']}, Status: {row['reservation_status']}"
         for _, row in results.iterrows()]
    )

    # Construct the prompt
    prompt = f"User Query: {query}\n\nBased on the following bookings:\n{context}\n\nCan you summarize this information?"

    # Generate AI response using LangChain
    response = llm.invoke(prompt)

    return response

In [16]:
# Example Query
user_query = "Find me hotels in Portugal with no deposit."
ai_response = generate_ai_response(user_query)

print("\nTravelIQ Response:")
print(ai_response)




TravelIQ Response:
User Query: Find me hotels in Portugal with no deposit.

Based on the following bookings:
Hotel: City Hotel, Country: Portugal, Market Segment: Complementary, Status: Check-Out
Hotel: City Hotel, Country: Portugal, Market Segment: Complementary, Status: Check-Out
Hotel: City Hotel, Country: Portugal, Market Segment: Complementary, Status: Check-Out

Can you summarize this information?

Yes, I can help with that. The information provided suggests that there are three bookings for the City Hotel in Portugal, all under the market segment of "Complementary". This market segment seems to indicate that no deposit is required for these bookings as they are possibly part of a complimentary or free stay offer. However, it's essential to cross-check this information with the hotel or booking platform's terms and conditions to confirm the exact deposit policy for these reservations.
