<a href="https://colab.research.google.com/github/kabezagit/genai-clip-zero-shot/blob/master/Copy_of_Product_Catalog_Q%26A_Code_using_langchain_and_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain-community langchain-openai langchain-chroma

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.27-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-chroma
  Downloading langchain_chroma-0.2.4-py3-none-any.whl.metadata (1.1 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting chromadb>=1.0.9 (from langchain-chroma)
  Downloading chromadb-1.0.15-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting pybase64>=1.4.1 (from chromadb>=1.0.9->langchain-chroma)
  Downloading pybase64-1.4.1-cp311-cp311-manylinux_2_5_x86_

In [None]:
import os
import time
from datetime import datetime

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
from google.colab import userdata


# --- 0. Set up Environment Variables ---
# Make sure you have your OpenAI API key set up.
# You can get one from https://platform.openai.com/account/api-keys
# It's recommended to set it as an environment variable:
# os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"

# Load the API key from Colab secrets
try:
    os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
except Exception as e:
    raise ValueError("OPENAI_API_KEY not found in Colab secrets. Please add it to Colab secrets.") from e


if "OPENAI_API_KEY" not in os.environ:
    raise ValueError("OPENAI_API_KEY environment variable not set. Please set it.")

# --- 1. Define LLM and Embeddings ---
llm = ChatOpenAI(model="gpt-4o", temperature=0)
embeddings = OpenAIEmbeddings()

# --- 2. Simulate a Dynamic Product Catalog (Our Knowledge Base) ---
# We'll use a dictionary to simulate product data.
# In a real scenario, this would come from a database or API.

product_data = {
    "product_a": {
        "name": "Eco-Friendly Stainless Steel Water Bottle",
        "id": "PROD-001",
        "price": "$25.00",
        "stock": 150,
        "description": "Durable, insulated stainless steel water bottle. Keeps drinks cold for 24 hours and hot for 12 hours. 750ml capacity. BPA-free.",
        "category": "Outdoor Gear",
        "last_updated": datetime(2025, 7, 1, 10, 0, 0)
    },
    "product_b": {
        "name": "Smart Home LED Lighting Kit",
        "id": "PROD-002",
        "price": "$79.99",
        "stock": 80,
        "description": "Control your home lighting from your smartphone. Features adjustable brightness, color temperature, and scheduling. Compatible with Alexa and Google Assistant.",
        "category": "Smart Home",
        "last_updated": datetime(2025, 7, 2, 14, 30, 0)
    },
    "product_c": {
        "name": "Ergonomic Office Chair (Classic Edition)",
        "id": "PROD-003",
        "price": "$299.00",
        "stock": 30,
        "description": "High-back ergonomic chair with lumbar support, adjustable armrests, and breathable mesh fabric. Designed for long hours of comfortable work.",
        "category": "Office Furniture",
        "last_updated": datetime(2025, 7, 3, 9, 15, 0)
    }
}

def create_document_from_product(product_id, data):
    """Converts product dictionary to a LangChain Document format."""
    content = (
        f"Product Name: {data['name']}\n"
        f"Product ID: {data['id']}\n"
        f"Price: {data['price']}\n"
        f"Stock: {data['stock']} units\n"
        f"Category: {data['category']}\n"
        f"Description: {data['description']}\n"
        f"Last Updated: {data['last_updated'].strftime('%Y-%m-%d %H:%M:%S')}"
    )
    return Document(
        page_content=content,
        metadata={"product_id": product_id, "name": data['name'], "category": data['category']}
    )

# --- 3. Initialize Vector Store ---
# We'll use a temporary directory for persistence to simulate a real DB,
# but it will be re-initialized for each run here for simplicity.
# For truly 'real-time' persistence, you'd use a cloud vector store.


def initialize_vector_store(data):
    documents = [create_document_from_product(pid, p_info) for pid, p_info in data.items()]
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(documents)
    print(f"Indexing {len(splits)} chunks into ChromaDB...")
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
    print("ChromaDB initialized.")
    return vectorstore

vectorstore = initialize_vector_store(product_data)
retriever = vectorstore.as_retriever()


# --- 4. Define the RAG Chain ---
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are an e-commerce assistant. Use the following product information to answer the user's questions truthfully and concisely. If the information is not available, state that you don't know."),
        ("human", "Context: {context}\nQuestion: {question}"),
    ]
)

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)

# --- 5. Simulate Real-time Updates and Queries ---

print("\n--- Initial Product Catalog Q&A ---")
initial_queries = [
    "What is the price and stock of the stainless steel water bottle?",
    "Tell me about the ergonomic office chair.",
    "Do you have any new products?" # This will initially not find anything specific
]

for query in initial_queries:
    print(f"\nUser: {query}")
    response = rag_chain.invoke(query)
    print(f"Assistant: {response}")
    print("-" * 30)

print("\n--- Simulating Real-time Product Update ---")
time.sleep(2) # Simulate some time passing

# Update an existing product
product_data["product_a"]["stock"] = 120
product_data["product_a"]["price"] = "$22.50 (On Sale!)"
product_data["product_a"]["last_updated"] = datetime.now()

# Add a new product
new_product_id = "product_d"
product_data[new_product_id] = {
    "name": "Wireless Noise-Cancelling Headphones",
    "id": "PROD-004",
    "price": "$149.99",
    "stock": 200,
    "description": "Premium wireless headphones with active noise cancellation, 30-hour battery life, and comfortable over-ear design. Perfect for travel and daily commute.",
    "category": "Electronics",
    "last_updated": datetime.now()
}
print(f"Product '{product_data['product_a']['name']}' updated. New product '{product_data[new_product_id]['name']}' added.")


# --- Re-index the vector store with updated data ---
# In a real-time system, this re-indexing (or incremental indexing)
# would happen automatically as data changes.
# For simplicity, we re-initialize the whole store here.
# For partial updates, you'd use vectorstore.add_documents or vectorstore.update_documents
print("\n--- Re-indexing knowledge base with updated product data ---")
vectorstore = initialize_vector_store(product_data)
retriever = vectorstore.as_retriever() # Update retriever with new vectorstore


print("\n--- Post-Update Product Catalog Q&A ---")
updated_queries = [
    "What is the current price and stock of the stainless steel water bottle?",
    "Tell me about the new wireless headphones.",
    "What's the stock level for PROD-003?",
    "Do you have any new products?"
]

for query in updated_queries:
    print(f"\nUser: {query}")
    response = rag_chain.invoke(query)
    print(f"Assistant: {response}")
    print("-" * 30)

Indexing 3 chunks into ChromaDB...
ChromaDB initialized.

--- Initial Product Catalog Q&A ---

User: What is the price and stock of the stainless steel water bottle?
Assistant: The price of the Eco-Friendly Stainless Steel Water Bottle is $22.50 (on sale), and there are 120 units in stock.
------------------------------

User: Tell me about the ergonomic office chair.
Assistant: The Ergonomic Office Chair (Classic Edition) is a high-back ergonomic chair designed for long hours of comfortable work. It features lumbar support, adjustable armrests, and breathable mesh fabric. The chair is priced at $299.00 and currently has 30 units in stock. It falls under the category of Office Furniture.
------------------------------

User: Do you have any new products?
Assistant: I don't have information on any new products beyond the ones listed in the provided documents.
------------------------------

--- Simulating Real-time Product Update ---
Product 'Eco-Friendly Stainless Steel Water Bottle' u