# Exercise 16 LangChain

**Goals**
- Learn how to build a **vector database** using LangChain and Chroma.  
- Create a simple **Retrieval-Augmented Generation (RAG) chatbot** that can answer customer inquiries using product data.  
- Understand how AI can be applied to **industrial use cases** such as online retail and customer support.  


In [11]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain.tools.retriever import create_retriever_tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor, create_openai_tools_agent
import pandas as pd

In [12]:
CSV_PATH = "data/data_ex16.csv"
PERSIST_DIR = "data/chroma_ex16_db"
EMB_MODEL = "text-embedding-3-small"
CHAT_MODEL = "gpt-5-nano"

In [14]:
# ----------------- Check CSV -----------------
df = pd.read_csv("data/data_ex16.csv")
print(df.head())
print(df.dtypes)

   id                       name category  price_usd  stock  \
0   1        Running Shoes Alpha  running     119.99     24   
1   2       Running Shoes SpeedX  running     149.00     10   
2   3            Trail Shoes Pro    trail     139.50     12   
3   4          Trail Shoes Ultra    trail     159.00      8   
4   5  Basketball Shoes Air Dunk   basket     129.90     15   

                                    description  
0                     Lightweight daily trainer  
1                     Carbon-plated racing shoe  
2                  Aggressive grip for off-road  
3  High cushion for long-distance trail running  
4            High ankle support for quick moves  
id               int64
name            object
category        object
price_usd      float64
stock            int64
description     object
dtype: object


In [15]:
# ----------------- Load CSV -> Documents -----------------
def csv_to_documents(csv_path):
    df = pd.read_csv(csv_path)   
    docs = []
    
    for i in range(len(df)):
        row = df.iloc[i]
        content = (
            "Product: " + str(row["name"]) + "\n"
            "Category: " + str(row["category"]) + "\n"
            "Price (USD): " + str(row["price_usd"]) + "\n"
            "Stock: " + str(row["stock"]) + "\n"
            "Description: " + str(row["description"])
        )
        docs.append(Document(page_content=content))

    return docs

In [16]:
# ----------------- Build / Load Chroma -----------------
def build_chroma(docs):
    embeddings = OpenAIEmbeddings(model=EMB_MODEL)
    vectordb = Chroma.from_documents(
        documents=docs,
        embedding=embeddings,
        persist_directory=PERSIST_DIR
    )
    return vectordb

In [17]:
# ----------------- Create a retriever tool for agent -----------------
def build_retriever_tool(vectordb):
    retriever = vectordb.as_retriever(search_kwargs={"k": 4})
    tool = create_retriever_tool(
        retriever=retriever,
        name="shoes_knowledge",
        description="Search for shoes by name, category, price, stock, or description."
    )
    return tool

In [7]:
# ----------------- Minimal RAG agent (optional demo) -----------------
def build_agent(tool):
    llm = ChatOpenAI(model=CHAT_MODEL)

    prompt = ChatPromptTemplate.from_messages([
        ("system",
         "You are a helpful assistant for a shoes-only online shop. "
         "Answer concisely. If the answer is not in the data, say you don't know."),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        MessagesPlaceholder("agent_scratchpad"),
    ])

    agent = create_openai_tools_agent(llm, tools=[tool], prompt=prompt)
    executor = AgentExecutor(agent=agent, tools=[tool], verbose=False)
    return executor

In [18]:
# ----------------- Main -----------------
def main():
    # 1) Read CSV -> Documents
    docs = csv_to_documents(CSV_PATH)

    # 2) Build or load Chroma vector DB
    vectordb = build_chroma(docs)

    # 3) Build RAG agent (LLM + retriever tool)
    tool = build_retriever_tool(vectordb)
    agent = build_agent(tool)

    # 4) Run the agent
    chat_history = []
    user_q = "I'm looking for running shoes for daily training. What options and prices do you have?"
    res = agent.invoke({"input": user_q, "chat_history": chat_history})

    print("\n--- Agent answer ---")
    print(res["output"])

if __name__ == "__main__":
    main()


--- Agent answer ---
We have one daily-training running shoe:

- Running Shoes Alpha — $119.99 — Lightweight daily trainer. In stock: 24 pairs.

Would you like to check available sizes/colors or add a pair to your cart?
