In [10]:
import json
import os
import faiss
from sentence_transformers import SentenceTransformer
import numpy as np
import time
import google.generativeai as genai
from dotenv import load_dotenv
import sqlite3 
from contextlib import contextmanager 
import pandas as pd 

print("Libraries imported successfully.")

Libraries imported successfully.


In [11]:

BACKEND_DIR = '/Users/dilshantharushika/Desktop/laptop agent/backend' 
INDEX_PATH = '/Users/dilshantharushika/Desktop/laptop agent/backend/laptops.index' 
METADATA_PATH = '/Users/dilshantharushika/Desktop/laptop agent/backend/laptops_metadata.json' 
DB_PATH = '/Users/dilshantharushika/Desktop/laptop agent/backend/laptops_dynamic.db'

EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2' 
GEMINI_MODEL_NAME = 'gemini-2.5-flash' #

print(f"FAISS index path: {os.path.abspath(INDEX_PATH)}")
print(f"Metadata path: {os.path.abspath(METADATA_PATH)}")
print(f"Dynamic DB path: {os.path.abspath(DB_PATH)}")
print(f"Using Embedding Model: {EMBEDDING_MODEL_NAME}")
print(f"Using LLM: {GEMINI_MODEL_NAME}")


if not os.path.exists(INDEX_PATH): print(f"WARNING: FAISS Index not found at {INDEX_PATH}")
if not os.path.exists(METADATA_PATH): print(f"WARNING: Metadata file not found at {METADATA_PATH}")
if not os.path.exists(DB_PATH): print(f"WARNING: SQLite DB not found at {DB_PATH}. Run setup_dynamic_db notebook/script.")

FAISS index path: /Users/dilshantharushika/Desktop/laptop agent/backend/laptops.index
Metadata path: /Users/dilshantharushika/Desktop/laptop agent/backend/laptops_metadata.json
Dynamic DB path: /Users/dilshantharushika/Desktop/laptop agent/backend/laptops_dynamic.db
Using Embedding Model: all-MiniLM-L6-v2
Using LLM: gemini-2.5-flash


In [12]:

print("Loading static RAG artifacts (FAISS, Metadata, Embedding Model)...")
embedding_model = None
faiss_index = None
metadata_store = None

try:
    if os.path.exists(INDEX_PATH) and os.path.exists(METADATA_PATH):
        embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
        faiss_index = faiss.read_index(INDEX_PATH)
        with open(METADATA_PATH, 'r', encoding='utf-8') as f:
            metadata_store = json.load(f)
        print(" RAG artifacts loaded successfully! ")
        print(f"Index contains {faiss_index.ntotal} vectors.")
        print(f"Metadata contains {len(metadata_store)} entries.")
    else:
        raise FileNotFoundError("Index or Metadata file missing.")
except Exception as e:
    print(f"Error loading static RAG artifacts: {e}")
 

Loading static RAG artifacts (FAISS, Metadata, Embedding Model)...
 RAG artifacts loaded successfully! 
Index contains 291 vectors.
Metadata contains 291 entries.


In [13]:

print("Loading .env file.")
load_dotenv() 

print("Configuring Google Generative AI client...")
google_api_key = os.getenv("GOOGLE_API_KEY")
llm_model = None 

if not google_api_key:
    print("Error: GOOGLE_API_KEY not found in environment.")
    print("Please create a .env file in this directory with GOOGLE_API_KEY=your-key")
else:
    try:
        genai.configure(api_key=google_api_key)
        llm_model = genai.GenerativeModel(GEMINI_MODEL_NAME)

        print("Google client configured successfully!")
    except Exception as e:
        print(f"Error configuring Google client (is API key valid/enabled?): {e}")



Loading .env file.
Configuring Google Generative AI client...
Google client configured successfully!


In [14]:
@contextmanager
def get_db_connection():
    """Provides a managed database connection to the dynamic DB."""
    conn = None
    try:
        if not os.path.exists(DB_PATH):
             raise FileNotFoundError(f"Database file not found at {DB_PATH}. Run the setup_dynamic_db notebook/script first.")
        conn = sqlite3.connect(DB_PATH)
        conn.row_factory = sqlite3.Row 
        yield conn
    except Exception as e:
        print(f"Database connection error: {e}")
        yield None 
    finally:
        if conn:
            conn.close()


def get_dynamic_data_for_sku(sku):
    """Fetches latest price, rating, availability etc. for a given SKU from SQLite."""
    dynamic_info = {"latest_price": "N/A", "avg_rating": "N/A", "availability": "N/A", "shipping_eta": "N/A", "vendor": "N/A"}
    try:
        with get_db_connection() as conn:
            if conn is None:
                print(f"Skipping dynamic data for {sku} due to connection error.")
                return dynamic_info 
            cursor = conn.cursor()

           
            cursor.execute("""
                SELECT price, date, vendor_name, promo_badges
                FROM PriceHistory
                WHERE laptop_sku = ?
                ORDER BY date DESC
                LIMIT 1
            """, (sku,))
            latest_price_row = cursor.fetchone()

          
            cursor.execute("""
                SELECT currency, average_rating, review_count, availability, shipping_eta
                FROM Laptop
                WHERE sku = ?
            """, (sku,))
            laptop_row = cursor.fetchone()

         
            if laptop_row:
                 dynamic_info["avg_rating"] = f"{laptop_row['average_rating']:.1f}/5.0 ({laptop_row['review_count']} reviews)"
                 dynamic_info["availability"] = laptop_row['availability']
                 dynamic_info["shipping_eta"] = laptop_row['shipping_eta']
                 currency = laptop_row['currency']
            else:
                 print(f"Warning: Laptop details not found in DB for SKU: {sku}")
                 currency = "Unknown Currency" 

            if latest_price_row:
                dynamic_info["latest_price"] = f"{currency} {latest_price_row['price']:.2f}"
                if latest_price_row['promo_badges'] and latest_price_row['promo_badges'].lower() != "none":
                     dynamic_info["latest_price"] += f" ({latest_price_row['promo_badges']})"
                dynamic_info["vendor"] = latest_price_row['vendor_name'] if latest_price_row['vendor_name'] else "N/A"
            else:
                 print(f"Warning: No price history found in DB for SKU: {sku}")


    except sqlite3.Error as e:
        print(f"SQLite error fetching dynamic data for SKU '{sku}': {e}")
    except Exception as e:
        print(f"Unexpected error fetching dynamic data for SKU '{sku}': {e}")

    return dynamic_info

print("Database helper functions defined.")


Database helper functions defined.


In [15]:
def query_rag_system_with_dynamic(query, k=4):
    """
    Performs RAG using FAISS (static specs) and SQLite (dynamic data) with Google Gemini.
    """

    if not all([embedding_model, faiss_index, metadata_store, llm_model]):
        print("Error: One or more components (embedding model, index, metadata, LLM) failed to load.")
        print("Please check previous cell outputs for errors (e.g., file paths, API keys).")
        return {"answer": "Error: System components not loaded.", "context_summary": {}}

    print(f"\n Processing Query: '{query}' ")

   
    print("Step 1: Retrieving static specs from FAISS...")
    start_retrieve_static = time.time()
    try:
        query_vector = embedding_model.encode([query]).astype('float32')
        distances, indices = faiss_index.search(query_vector, k)
        retrieved_chunks = [metadata_store[i] for i in indices[0]]
    except Exception as e:
        print(f"  Error during FAISS search: {e}")
        return {"answer": f"Error during search: {e}", "context_summary": {}}
    end_retrieve_static = time.time()
    print(f"  > Done ({len(retrieved_chunks)} chunks) in {end_retrieve_static - start_retrieve_static:.3f} seconds.")

    
    print("Step 1b: Retrieving dynamic data from SQLite...")
    start_retrieve_dynamic = time.time()
    
    mentioned_skus = sorted(list(set(chunk['sku'] for chunk in retrieved_chunks if chunk.get('sku'))))
    print(f"  Identified SKUs in static context: {mentioned_skus}")

    dynamic_context_dict = {} 
    if not mentioned_skus:
        print("  > No specific laptop model identified in static context.")
    else:
        for sku in mentioned_skus:
             dynamic_context_dict[sku] = get_dynamic_data_for_sku(sku) 
    end_retrieve_dynamic = time.time()
    print(f"  > Done in {end_retrieve_dynamic - start_retrieve_dynamic:.3f} seconds.")



    print("Step 2: Augmenting context for LLM...")
    
    static_context_string = "\n--- STATIC SPECIFICATIONS CONTEXT ---\n"
    if not retrieved_chunks:
        static_context_string += "No relevant specifications found.\n"
    else:
        for i, chunk in enumerate(retrieved_chunks):
            static_context_string += f"Context {i+1} (Source: {chunk.get('sku', 'Unknown')}, Section: {chunk.get('section_title', 'N/A')}):\n"
            static_context_string += f"  Content: {chunk.get('text', 'N/A')}\n"
            if chunk.get('citations'):
                static_context_string += f"  Citations: {chunk['citations']}\n\n"
            else:
                static_context_string += "\n"


    dynamic_context_string = "\n--- CURRENT DYNAMIC DATA ---\n"
    if not dynamic_context_dict:
         dynamic_context_string += "No dynamic data retrieved for identified models.\n"
    else:
        for sku, data in dynamic_context_dict.items():
             dynamic_context_string += f"For '{sku}':\n"
             dynamic_context_string += f"  - Latest Price: {data.get('latest_price', 'N/A')}\n"
             dynamic_context_string += f"  - Availability: {data.get('availability', 'N/A')}\n"
             # dynamic_context_string += f"  - Shipping ETA: {data.get('shipping_eta', 'N/A')}\n" 
             dynamic_context_string += f"  - Average Rating: {data.get('avg_rating', 'N/A')}\n\n"


    combined_context = static_context_string + dynamic_context_string

    prompt = f"""
    You are an expert Q&A assistant and recommender for laptop specifications.
    Your answers must be accurate and concise, directly based on the provided context ONLY (both static specs and dynamic data).
    Do not use any outside knowledge or information not present in the context.
    Prioritize dynamic data like price, availability, and rating if the query specifically asks for it or implies a purchase decision (e.g., "recommend", "cheapest", "available").
    [cite_start]When you use information from the 'STATIC SPECIFICATIONS CONTEXT', you MUST cite the 'Citations' number provided (e.g., [cite: 123]). Do not make up citations. Cite specific citations when possible.
    When you use information from 'CURRENT DYNAMIC DATA', clearly state it (e.g., "The current price is...", "It is currently In Stock.", "The average rating is..."). Do not add citations for dynamic data.

    Here is the context retrieved from internal databases:
    {combined_context}
    --- END CONTEXT ---

    Based *only* on the context provided above, please answer the following question or fulfill the recommendation request:
    Question: {query}
    Answer:
    """

 
    print("Step 3: Generating answer using Google Gemini...")
    llm_answer = "Error: LLM generation failed." 
    start_generate = time.time()
    try:
        generation_config = genai.types.GenerationConfig(
            temperature=0.0, 
            max_output_tokens=768
        )
       
        response = llm_model.generate_content(
            prompt,
            generation_config=generation_config,
            
        )

   
        if not response.parts:
             if hasattr(response, 'prompt_feedback') and response.prompt_feedback.block_reason:
                 llm_answer = f"Error: Content generation blocked by safety settings. Reason: {response.prompt_feedback.block_reason}"
                 print(f"  > Generation Blocked: {response.prompt_feedback.block_reason}")
             else:
                 llm_answer = "Error: LLM response was empty or blocked for an unknown reason."
                 print("  > Generation Error: Empty or unknown block.")
        else:
             llm_answer = response.text.strip()
             end_generate = time.time()
             print(f"  > Done in {end_generate - start_generate:.3f} seconds.")

    except Exception as e:
        end_generate = time.time()
        print(f"  > Generation failed after {end_generate - start_generate:.3f} seconds.")
        print(f"  Error during Google Gemini API call: {e}")
        llm_answer = f"Error during LLM call: {e}" 

    
    print("\n--- LLM Answer ---")
    print(llm_answer)

   
    return {"answer": llm_answer, "context_summary": {"static_chunks_retrieved": len(retrieved_chunks), "dynamic_skus_queried": mentioned_skus}}

print("Combined RAG function `query_rag_system_with_dynamic` defined.")

Combined RAG function `query_rag_system_with_dynamic` defined.


In [16]:

results1 = query_rag_system_with_dynamic("What is the current price and availability of the ThinkPad E14 Gen 5 Intel?")
print("-" * 50) 


 Processing Query: 'What is the current price and availability of the ThinkPad E14 Gen 5 Intel?' 
Step 1: Retrieving static specs from FAISS...
  > Done (4 chunks) in 0.141 seconds.
Step 1b: Retrieving dynamic data from SQLite...
  Identified SKUs in static context: ['HP ProBook 440 14 inch G11 Notebook PC', 'Lenovo ThinkPad E14 Gen 5 (AMD)', 'ThinkPad E14 Gen 5 (Intel)']
  > Done in 0.002 seconds.
Step 2: Augmenting context for LLM...
Step 3: Generating answer using Google Gemini...
  > Done in 8.520 seconds.

--- LLM Answer ---
The current price for the ThinkPad E14 Gen 5 (Intel) is LKR 369896.71, and it is currently In Stock.
--------------------------------------------------
