In [1]:
import json
import os
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np
import time
import google.generativeai as genai
from dotenv import load_dotenv

print("Libraries imported successfully.")

  from .autonotebook import tqdm as notebook_tqdm


Libraries imported successfully.


In [2]:
load_dotenv()
BACKEND_DIR = '/Users/dilshantharushika/Desktop/laptop agent/backend' 
INDEX_PATH = os.path.join(BACKEND_DIR, 'laptops.index')
METADATA_PATH = os.path.join(BACKEND_DIR, 'laptops_metadata.json')
EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2' 
GEMINI_MODEL_NAME = 'gemini-2.5-flash' 
print(f"FAISS index path: {os.path.abspath(INDEX_PATH)}")
print(f"Metadata path: {os.path.abspath(METADATA_PATH)}")
print(f"Using Embedding Model: {EMBEDDING_MODEL_NAME}")
print(f"Using LLM: {GEMINI_MODEL_NAME}")

FAISS index path: /Users/dilshantharushika/Desktop/laptop agent/backend/laptops.index
Metadata path: /Users/dilshantharushika/Desktop/laptop agent/backend/laptops_metadata.json
Using Embedding Model: all-MiniLM-L6-v2
Using LLM: gemini-2.5-flash


In [3]:

print("Loading RAG artifacts")
try:
    embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
    faiss_index = faiss.read_index(INDEX_PATH)
    with open(METADATA_PATH, 'r', encoding='utf-8') as f:
        metadata_store = json.load(f)
    print("RAG artifacts loaded successfully!")
    print(f"Index contains {faiss_index.ntotal} vectors.")
    print(f"Metadata contains {len(metadata_store)} entries.")
except Exception as e:
    print(f"Error loading RAG artifacts: {e}")
    print(f"Make sure '{INDEX_PATH}' and '{METADATA_PATH}' exist.")
    embedding_model = None
    faiss_index = None
    metadata_store = None

Loading RAG artifacts
RAG artifacts loaded successfully!
Index contains 291 vectors.
Metadata contains 291 entries.


In [4]:
print("Configuring Google Generative AI client...")


google_api_key = os.getenv("GOOGLE_API_KEY")

if not google_api_key:
    print("Error: GOOGLE_API_KEY not found.")
    print("Please create a .env file in the same directory as this notebook and add:")
    print("GOOGLE_API_KEY=your-google-api-key")
    llm_model = None 
else:
    try:
        genai.configure(api_key=google_api_key)
        llm_model = genai.GenerativeModel(GEMINI_MODEL_NAME)
        print("Google client configured successfully!")
    except Exception as e:
        print(f"Error configuring Google client (is your API key correct?): {e}")
        llm_model = None

Configuring Google Generative AI client...
Google client configured successfully!


In [5]:
def query_rag_system(query, k=4):
    """
    Performs the full Retrieve, Augment, Generate (RAG) pipeline using Google Gemini.
    """
   
    if not all([embedding_model, faiss_index, metadata_store, llm_model]):
        print("Error: One or more components (embedding model, index, metadata, LLM) failed to load.")
        return

    print(f" Query: {query} ")
    

    start_retrieve = time.time()
    query_vector = embedding_model.encode([query]).astype('float32')
    distances, indices = faiss_index.search(query_vector, k)
    retrieved_chunks = [metadata_store[i] for i in indices[0]]
    end_retrieve = time.time()
    print(f"Retrieval took {end_retrieve - start_retrieve:.2f} seconds.")

    
    context_string = ""
    for i, chunk in enumerate(retrieved_chunks):
        context_string += f"Context {i+1}:\n"
        context_string += f"  Source: {chunk['sku']}\n"
        context_string += f"  Content: {chunk['text']}\n"
        context_string += f"  Citations: {chunk['citations']}\n\n"
    
    prompt = f"""
    You are an expert Q&A assistant for laptop specifications.
    Your answers must be accurate and directly based on the provided context only.
    Do not use any outside knowledge.
    When you use information, you MUST cite the 'Citations' number provided with the context (e.g.,).

    Here is the context retrieved from the database:
    --- START CONTEXT ---
    {context_string}
    --- END CONTEXT ---

    Based *only* on the context provided, please answer the following question:
    Question: {query}
    Answer:
    """

    
    print("Sending to Google Gemini API...")
    start_generate = time.time()
    try:
        generation_config = genai.types.GenerationConfig(
            temperature=0.0,
            max_output_tokens=512
        )
        response = llm_model.generate_content(
            prompt,
            generation_config=generation_config
        )
        answer = response.text.strip()
        end_generate = time.time()
        
        print(f"Generation took {end_generate - start_generate:.2f} seconds.")
        print("--- LLM Answer ---")
        print(answer)
        
        print("\n--- Context Provided to LLM ---")
        print(context_string)

    except Exception as e:
        end_generate = time.time()
        print(f"Generation failed after {end_generate - start_generate:.2f} seconds.")
        print(f"Error during Google Gemini API call: {e}")
        

In [6]:
query_rag_system("What is the maximum memory supported by the Thinkpad E14 Gen 5 Intel?")

 Query: What is the maximum memory supported by the Thinkpad E14 Gen 5 Intel? 
Retrieval took 0.34 seconds.
Sending to Google Gemini API...
Generation took 4.83 seconds.
--- LLM Answer ---
The ThinkPad E14 Gen 5 (Intel) supports a maximum memory of up to 48GB (16GB soldered + 32GB SO-DIMM) DDR4-3200 [65, 71]. It is noted that 48GB is for technical readiness testing, and available memory to sell may vary [65, 71].

--- Context Provided to LLM ---
Context 1:
  Source: HP ProBook 450 G10 — Datasheet
  Content: Maximum memory: 32 GB DDR4-3200 MHz RAM; (Transfer rates up to 3200 MT/s.); 7 Both slots are accessible/upgradeable by IT or self-maintainers only. Supports dual channel memory. Memory slots: 2 SODIMM.
  Citations: []

Context 2:
  Source: Lenovo ThinkPad E14 Gen 5 (AMD)
  Content: Max Memory: Up to 40GB (8GB soldered + 32GB SO-DIMM) DDR4-3200. Memory Slots: One memory soldered to systemboard, one DDR4 SO-DIMM slot, dual-channel capable. Memory Type: DDR4-3200.
  Citations: []

Cont