# Exercise 16 LangChain

**Goals**
- Learn how to build a **vector database** using LangChain and Chroma.  
- Create a simple **Retrieval-Augmented Generation (RAG) chatbot** that can answer customer inquiries using product data.  
- Understand how AI can be applied to **industrial use cases** such as online retail and customer support.  


In [2]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain.tools.retriever import create_retriever_tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor, create_openai_tools_agent
import pandas as pd

In [3]:
CSV_PATH = "data/data_ex16.csv"           # shoes dataset (id,name,category,price_usd,stock,description)
PERSIST_DIR = "data/chroma_ex16_db"      # where Chroma will persist locally
EMB_MODEL = "text-embedding-3-small" # good balance of quality/speed
CHAT_MODEL = "gpt-5-nano"            # or "gpt-4.1"

In [4]:
# ----------------- Load CSV -> Documents -----------------
def csv_to_documents(csv_path: str) -> list[Document]:
    df = pd.read_csv(csv_path)
    docs: list[Document] = []
    for _, r in df.iterrows():
        content = (
            f"Product: {r['name']}\n"
            f"Category: {r['category']}\n"
            f"Price (USD): {r['price_usd']}\n"
            f"Stock: {r['stock']}\n"
            f"Description: {r['description']}"
        )
        docs.append(Document(page_content=content))
    return docs

In [5]:
# ----------------- Build / Load Chroma -----------------
def build_chroma(docs: list[Document]) -> Chroma:
    """Always build a Chroma DB fresh from documents."""
    embeddings = OpenAIEmbeddings(model=EMB_MODEL)
    vectordb = Chroma.from_documents(
        documents=docs,
        embedding=embeddings,
        persist_directory=PERSIST_DIR,
    )
    return vectordb

In [6]:
# ----------------- Create a retriever tool (for agent use) -----------------
def build_retriever_tool(vectordb: Chroma):
    retriever = vectordb.as_retriever(search_kwargs={"k": 4})
    tool = create_retriever_tool(
        retriever=retriever,
        name="shoes_knowledge",
        description="Search for shoes by name, category, price, stock, or description."
    )
    return tool

In [8]:
# ----------------- Minimal RAG agent (optional demo) -----------------
def build_agent(tool):
    llm = ChatOpenAI(model=CHAT_MODEL)

    prompt = ChatPromptTemplate.from_messages([
        ("system",
         "You are a helpful assistant for a shoes-only online shop. "
         "Answer concisely. If the answer is not in the data, say you don't know."),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        MessagesPlaceholder("agent_scratchpad"),
    ])

    agent = create_openai_tools_agent(llm, tools=[tool], prompt=prompt)
    executor = AgentExecutor(agent=agent, tools=[tool], verbose=False)
    return executor

In [9]:
# ----------------- Main -----------------
def main():
    # 1) Read CSV -> Documents
    docs = csv_to_documents(CSV_PATH)

    # 2) Build or load Chroma vector DB
    vectordb = build_chroma(docs)

    # Step 3: Build RAG agent (LLM + retriever tool)
    tool = build_retriever_tool(vectordb)
    agent = build_agent(tool)

    # Example user query
    chat_history = []
    user_q = "I'm looking for running shoes for daily training. What options and prices do you have?"

    # Run the agent
    res = agent.invoke({"input": user_q, "chat_history": chat_history})

    print("\n--- Agent answer ---")
    print(res["output"])

if __name__ == "__main__":
    main()


--- Agent answer ---
Option available:
- Running Shoes Alpha — $119.99. Lightweight daily trainer. Category: running. In stock: 24.

What size and color would you like? I can also check for similar running models if you want.
