In [None]:
# Install the necessary libraries
!pip install google-genai faiss-cpu sentence-transformers pandas --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m83.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os
from google import genai
from IPython.display import clear_output # Needed for clear_output()
import pandas as pd
import numpy as np

# Prompt the user for the API key
os.environ["GEMINI_API_KEY"] = input("Enter your Gemini API key: ")
clear_output() # Clears the input prompt and key from the output

# Initialize the Gemini Client
try:
    client = genai.Client()
    print("✅ Setup Complete: Gemini Client initialized and API Key loaded.")
except Exception as e:
    print(f"❌ ERROR: Could not initialize client. Please check the API key you entered. Error: {e}")

# Define the models we will use
EMBEDDING_MODEL = 'text-embedding-004' # A strong embedding model
"""
Text-embedding-004, generates a vector that is 768 dimensions long.
Therefore, each row (chunk) contains 768 numerical values, resulting in 768 columns.
"""
GENERATION_MODEL = 'gemini-2.5-flash' # A fast and capable generation model

print(f"\nConfiguration:")
print(f"- Embedding Model: {EMBEDDING_MODEL}")
print(f"- Generation Model: {GENERATION_MODEL}")

✅ Setup Complete: Gemini Client initialized and API Key loaded.

Configuration:
- Embedding Model: text-embedding-004
- Generation Model: gemini-2.5-flash


# **FAQ - Text splitting/chunking**

In [None]:
raw_corpus_text = """
Q1. What does Eureka Forbes offer?
Eureka Forbes provides home health and hygiene solutions including water purifiers (AquaGuard & AquaSure), vacuum cleaners, air purifiers, security systems, and home service plans.

Q2. What is the difference between AquaGuard and AquaSure?
AquaGuard – premium range with advanced purification technologies and IoT-enabled features.
AquaSure – value-for-money models offering essential purification.

Q3. How do I choose the right water purifier for my home?
It depends on your water source:
RO – Borewell/Hard water (TDS > 500 ppm)
UV – Municipal/Soft water (TDS < 200 ppm)
RO+UV/RO+UV+UF – Mixed water supply
You can also request a free water test.

Q4. How often should filters be replaced?
Typically every 6–12 months, depending on water quality and usage. The purifier alerts you when filters need replacement.

Q5. What is the TDS range suitable for drinking water?
As per BIS standards: 50–150 ppm is ideal for drinking.

Q6. My AquaGuard is showing an error alert. What should I do?
Most alerts indicate filter change or sensor issues. You can:
Restart the unit
Check the display panel
Book a service request via Eureka Forbes app, customer care, or website

Q7. How do I book a service request?
You can book through:
Eureka Forbes website
Mobile app
Customer care number: 1860 266 1177
WhatsApp service (where available)

Q8. What is the average installation time?
Installation is usually completed within 24–48 hours after delivery.

Q9. Are installation and demo free?
For most products, installation is free
Some models require paid consumables (pre-filter housing, extra pipes, booster pump, etc.)

Q10. What is the warranty on Eureka Forbes products?
Water purifiers: 1 year warranty (+ extended warranty available)
Vacuum cleaners: 1–2 years depending on model

Q11. What does the warranty cover?
It covers manufacturing defects, electrical failures, and service technician visits. It does not cover consumables such as filters.

Q12. What is AMC (Annual Maintenance Contract)?
AMC includes:
Periodic services
Free filter replacement
Free technician visits
Priority support
Ideal for maintaining RO purifiers.

Q13. What types of vacuum cleaners does Eureka Forbes offer?
Wet & Dry Vacuum Cleaners
Handheld Vacuums
Robotic Vacuum Cleaners
Upright and Stick Vacuums

Q14. How often should vacuum filters be cleaned?
For best performance, clean after every 2–3 uses and replace every 6–12 months depending on usage.

Q15. Are spare parts easily available?
Yes, all genuine spare parts and accessories are available through authorized service centres and the official website.

Q16. What does the air purifier remove?
Removes PM2.5, dust, smoke, allergens, pet dander, VOCs, and odours using multi-layer filtration including HEPA filters.

Q17. How often should the HEPA filter be replaced?
Usually every 9–12 months, depending on usage and air quality.

Q18. What payment methods are accepted?
Credit/Debit Cards, UPI, Net Banking, EMI (No-cost EMI on select models), and COD in select cities.

Q19. Can I return or replace a product?
Returns are allowed under the return policy terms if the product is defective or not as expected. Replacement is processed after technician verification.

Q20. Why is my RO water flow low?
Possible reasons:
Choked filters
Low inlet water pressure
Blocked membrane
Pump malfunction
A technician visit is recommended.

Q21. Water tastes odd after filter change. Why?
This is normal for the first 10–15 litres. Flush the purifier before drinking.

Q22. My purifier is leaking. What should I do?
Switch off the unit and inlet valve, then raise a service request. Usually caused by loose pipes or worn-out connectors.

Q23. How can I contact Eureka Forbes customer care?
Call: 1860 266 1177
Website: www.eurekaforbes.com
App: Eureka Forbes Service App
Email support varies by zone (can be added if needed).
"""

# Split the text by the new Q#. pattern to create clean chunks

import re #Regression expression module

# Split by the pattern Q[number]. followed by a newline, keeping the pattern for now
chunks = re.split(r'(\nQ\d+\. )', raw_corpus_text)

# Reconstruct the Q&A pairs and clean up empty strings
clean_chunks = []
for i in range(1, len(chunks), 2):
    # Combine the Q#. text (chunks[i]) with the answer text (chunks[i+1])
    clean_chunks.append((chunks[i].strip() + chunks[i+1].strip()).replace('\n', ' '))

# Final list of chunks for embedding
final_chunks = [chunk for chunk in clean_chunks if chunk]

print(f"Total initial Q&A pairs (chunks) created: {len(final_chunks)}")
print(f"Example of a cleaned chunk:\n{final_chunks[22]}")

Total initial Q&A pairs (chunks) created: 23
Example of a cleaned chunk:
Q23.How can I contact Eureka Forbes customer care? Call: 1860 266 1177 Website: www.eurekaforbes.com App: Eureka Forbes Service App Email support varies by zone (can be added if needed).


#EMBEDDING (generating vecotrs)

In [None]:
# Import necessary type hint
from typing import List
import numpy as np
import faiss

# Function to generate embeddings using the Gemini API
def embed_text(texts: List[str]) -> List[List[float]]:
    """Generates embeddings for a list of texts using the Gemini API."""
    global client # Ensure we are using the globally defined client
    try:
        response = client.models.embed_content( #This is the actual call to the Google Gemini API.
            model=EMBEDDING_MODEL, #specifies which embedding model of gemini to use
            contents=texts
        )

        # Use a list comprehension to pull the numerical vector out of each object
        numerical_embeddings = [emb.values for emb in response.embeddings] #It iterates through the list of response objects and uses .values to extract the raw list of 768 floating-point numbers (the actual vector) from each one.

        return numerical_embeddings
    except Exception as e:
        print(f"An error occurred during embedding: {e}")
        return []

print("Generating embeddings for all 23 chunks...")
# 1. Generate embeddings for all chunks
chunk_embeddings = embed_text(final_chunks)
chunk_embeddings_np = np.array(chunk_embeddings, dtype='float32') #It converts it into a NumPy array (chunk_embeddings_np).The dtype='float32' specifies the data type. FAISS is highly optimized for float32 (single-precision floating point numbers), making this conversion essential for performance and compatibility.

# Verification
print("\n--- Embedding Results ---")
print(f"Number of embeddings generated: {len(chunk_embeddings)}")
if chunk_embeddings:
    print(f"Dimension of each embedding vector: {len(chunk_embeddings[0])}")
    print("✅ Embedding successful!")

Generating embeddings for all 23 chunks...

--- Embedding Results ---
Number of embeddings generated: 23
Dimension of each embedding vector: 768
✅ Embedding successful!


#Indexing (FAISS Vector Store) Facebook AI Similarity Search

In [None]:
# Code Block C (Indexing)
# 1. Determine the vector dimension
embedding_dimension = chunk_embeddings_np.shape[1] #This is the NumPy array containing all your vectors, generated in the previous step. It's a 2D array (a matrix) where: 23 rows, 768 col

# 2. Create the FAISS Index
index = faiss.IndexFlatL2(embedding_dimension)
# faiss.IndexFlatL2(...): This creates the actual searchable index.
# IndexFlat: This type of index means the search will be a "brute-force" search (checking every vector). For small datasets (like your 23 chunks), this is the simplest and most accurate method.
# L2: Specifies the distance metric used for comparison. L2 stands for Euclidean Distance (the straight-line distance between two points). In semantic search, a smaller L2 distance means the two vectors (query and chunk) are more semantically similar

# 3. Add the embeddings to the index
index.add(chunk_embeddings_np)

# Verification
print("\n--- FAISS Indexing Results ---")
print(f"FAISS index created with dimension: {embedding_dimension}")
print(f"Total vectors in the index: {index.ntotal}")

# Save the chunks and index for easy access in the next step
corpus_chunks = final_chunks
print("\n✅ RAG Corpus Preparation Complete: Chunks Embedded and Indexed!")


--- FAISS Indexing Results ---
FAISS index created with dimension: 768
Total vectors in the index: 23

✅ RAG Corpus Preparation Complete: Chunks Embedded and Indexed!


#**Building RAG**
##Setting K=6 which helps in taking top 6 similar chunks.
If we reduce it 3 then the answer might not be meaningful. In this case if we ask two questions at a time, model only answers first questions as similarity will be limited top 3 chunks.

**To reduce Hallucination** - Gave system prompt to LLM and asked it to stay strictly within in context and verify before answering.

Initially output produced by AI Agent mentioned "As per the data provided". Hence, asked AI agent to not use it in the prompt itself, which helped in solving this problem.

In [None]:
# --- Step 4: Building the RAG Function ---
from typing import Tuple, List

# Define the number of chunks to retrieve for context
TOP_K = 6

def get_rag_context(query: str, k: int = TOP_K) -> str:
    """
    1. Embeds the user query.
    2. Searches the FAISS index for the top k chunks.
    3. Returns the concatenated context string.
    """
    global index, corpus_chunks, client # Use global variables defined in Step 3

    # 1. Embed the query (using the same embed_text function, but only for the query)
    # We need to reuse the embed_text function from Code Block B
    query_embedding = embed_text([query]) # Embed the query list

    # Convert to NumPy array for FAISS
    query_embedding_np = np.array(query_embedding, dtype='float32')

    # 2. Search the FAISS index
    # D: Distances, I: Indices
    D, I = index.search(query_embedding_np, k) #This is the core search command. k (set to 6) tells FAISS to return the 6 most similar vectors.
    # D = Euclidean distance (similarity score) for the top 6 matches.
    # I = A NumPy array containing the index position (0 to 22) of the top 6 matching chunks in the corpus_chunks list.

    # 3. Retrieve the corresponding text chunks
    retrieved_chunks = [corpus_chunks[i] for i in I[0]]

    # Joins the 6 retrieved chunks into one large string, separated by \n---\n for clean formatting, which is then returned to the generation function.
    context = "\n---\n".join(retrieved_chunks)
    # print(context) #To understand the what 6 chunks it is similar to
    return context

def generate_rag_response(query: str) -> str:
    """
    Performs the full RAG process (Retrieval + Generation).
    This function combines the retrieved context with the user's query and sends it to the Gemini model for a factual answer.
    """
    # Retrieve the context
    retrieved_context = get_rag_context(query)

    # 3. Construct the prompt for the LLM
    """
    This is the crucial Instruction for the LLM.
    It dictates the bot's persona ("expert conversational AI bot for Eureka Forbes") and sets strict constraints:
    MUST be based STRICTLY on the context, Do not use external knowledge, and maintain a "professional and helpful" tone
    """
    system_prompt = (
        "You are an expert conversational AI bot for Eureka Forbes, designed to answer customer queries. "
        "Your response MUST be based STRICTLY on the context provided below.No need to mention as per the data provided. "
        "Do not use external knowledge. If the answer is not in the context, state that clearly but politely that you dont know about it and please call a toll free number 1860 266 1177 which is also available on WhatsApp. "
        "Keep the tone professional and helpful."
    )

    full_prompt = (
        f"{system_prompt}\n\n"  #Define guardrails
        f"--- CONTEXT START ---\n"
        f"{retrieved_context}\n"
        f"--- CONTEXT END ---\n\n"
        f"CUSTOMER QUERY: {query}"
    )

    # 4. Generate the response using Gemini-2.5-Flash
    try:
        response = client.models.generate_content(
            model=GENERATION_MODEL,
            contents=full_prompt,
        )
        # Ensure the response object is handled correctly (we expect a string for the text)
        return response.text
    except Exception as e:
        return f"Sorry, an error occurred during generation: {e}"

print("✅ RAG Retrieval and Generation functions defined.")

# --- Test the RAG Function ---
test_query = "How often should I change my filters and why is my water pressure low?"
test_response = generate_rag_response(test_query)

print("\n--- Test Query Results ---")
print(f"Query: {test_query}")
print(f"Response:\n{test_response}")

✅ RAG Retrieval and Generation functions defined.

--- Test Query Results ---
Query: How often should I change my filters and why is my water pressure low?
Response:
Filters should typically be replaced every 6–12 months, depending on water quality and usage. Your purifier will alert you when the filters need replacement.

If your RO water flow is low, possible reasons include choked filters, low inlet water pressure, a blocked membrane, or a pump malfunction. A technician visit is recommended for this issue.


In [None]:
# --- Step 5: Conversational Loop ---

# IMPORTANT: Ensure that the following global variables are defined from previous steps:
# - client (Gemini API client)
# - GENERATION_MODEL
# - EMBEDDING_MODEL
# - index (FAISS index)
# - corpus_chunks (List of text chunks)
# - embed_text (Function from Code Block B)
# - generate_rag_response (Function from Code Block D)

print("--- Eureka Forbes RAG Chatbot Initialized ---")
print("You can now ask questions about the Eureka Forbes documentation.")
print("Type 'quit' or 'exit' to end the session.")
print("-" * 50)

# Start the interactive loop
while True:
    try:
        user_input = input("You: ").strip()

        if user_input.lower() in ['quit', 'exit']:
            print("Chatbot session ended. Goodbye!")
            break

        if not user_input:
            continue

        print("Chatbot: Thinking...")

        # Generate the RAG response
        response = generate_rag_response(user_input)

        print(f"Chatbot: {response}\n")

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        break

--- Eureka Forbes RAG Chatbot Initialized ---
You can now ask questions about the Eureka Forbes documentation.
Type 'quit' or 'exit' to end the session.
--------------------------------------------------
You: What is warranty and amc coverage
Chatbot: Thinking...
Chatbot: The warranty covers manufacturing defects, electrical failures, and service technician visits, but it does not cover consumables such as filters. Water purifiers typically have a 1-year warranty with extended warranty options, while vacuum cleaners have a 1–2 year warranty depending on the model.

The Annual Maintenance Contract (AMC) includes periodic services, free filter replacement, free technician visits, and priority support, making it ideal for maintaining RO purifiers.

You: What is the warranty on the RO purifier?
Chatbot: Thinking...
Chatbot: Eureka Forbes water purifiers come with a 1-year warranty, and an extended warranty is also available.

You: Does it cover filters?
Chatbot: Thinking...
Chatbot: Filter

# **Getting conversational memory up**

In [None]:
# Initialize a new chat session when the program starts
chat_session = client.chats.create(model=GENERATION_MODEL)

print("✅ Chat session initialized for conversational memory.")

✅ Chat session initialized for conversational memory.


In [None]:
# --- Revised Generation Function for Chat History ---

def generate_rag_response_with_history(query: str, chat_session_obj) -> str:
    """
    Performs RAG and sends the context and query to the Gemini Chat session.
    """
    # 1. Retrieval: Get the context based ONLY on the current user query
    retrieved_context = get_rag_context(query)

    # 2. Construct the full instruction prompt (System Prompt + Context)
    # The actual chat history is managed automatically by chat_session_obj
    system_instruction = (
        "You are an expert conversational AI bot for Eureka Forbes. "
        "Your response MUST be based ONLY on the context provided in this prompt. "
        "Do not use external knowledge. The context is enclosed in the CONTEXT START/END tags. "
        "Keep the tone professional and helpful. Do not mention 'as per the data provided' or similar phrasing. "
        "If the answer is not in the context, state that politely that you dont know about it and please call a toll free number 1860 266 1177 which is also available on WhatsApp."
    )

    # Combine the system instruction and context to guard the chat model
    guarded_context_prompt = (
        f"{system_instruction}\n\n"
        f"--- CONTEXT START ---\n"
        f"{retrieved_context}\n"
        f"--- CONTEXT END ---\n\n"
        f"CUSTOMER QUERY: {query}"
    )

    # 3. Generation: Send the message using the chat object (history is preserved)
    try:
        response = chat_session_obj.send_message(
            guarded_context_prompt
        )
        return response.text
    except Exception as e:
        return f"Sorry, an error occurred during generation: {e}"

print("✅ Generation function updated to utilize chat history.")

✅ Generation function updated to utilize chat history.


In [None]:
# --- Revised Conversational Loop (Step 6) ---

print("--- Eureka Forbes RAG Chatbot (with Memory) Initialized ---")
print("You can now ask follow-up questions (e.g., 'Does it cover filters?').")
print("Type 'quit' or 'exit' to end the session.")
print("-" * 50)

# Start the interactive loop
while True:
    try:
        user_input = input("You: ").strip()

        if user_input.lower() in ['quit', 'exit']:
            print("Chatbot session ended. Goodbye!")
            break

        if not user_input:
            continue

        print("Chatbot: Thinking...")

        # !!! Call the new function and pass the chat session !!!
        response = generate_rag_response_with_history(user_input, chat_session)

        print(f"Chatbot: {response}\n")

    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        break

--- Eureka Forbes RAG Chatbot (with Memory) Initialized ---
You can now ask follow-up questions (e.g., 'Does it cover filters?').
Type 'quit' or 'exit' to end the session.
--------------------------------------------------
You: What is the warranty on Eureka Forbes products?
Chatbot: Thinking...
Chatbot: The warranty for Eureka Forbes products is as follows:

*   **Water purifiers:** 1 year warranty (+ extended warranty available)
*   **Vacuum cleaners:** 1–2 years depending on the model

You: Does it cover new filters?
Chatbot: Thinking...
Chatbot: The warranty covers manufacturing defects, electrical failures, and service technician visits. It does not cover consumables such as filters.

You: Is it free of cost
Chatbot: Thinking...
Chatbot: For most products, installation is free.

You: Does it come with the product or I have to buy it with the product?
Chatbot: Thinking...
Chatbot: For most products, installation is free and is usually completed within 24–48 hours after delivery.

Y