# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [1]:
pip install --upgrade langchain

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from dotenv import load_dotenv
import google.generativeai as genai
load_dotenv(dotenv_path="/Users/pulkitaggarwal/LangChain1/Gemini_Key.env")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-1.5-flash')
response = model.generate_content("Hello Gemini")
print(response.text)

Hello!  How can I help you today?



In [3]:
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI  # Gemini equivalent of ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
# No need to import OpenAI anymore


In [4]:
file = "/Users/pulkitaggarwal/LangChain1/OutdoorClothingCatalog_1000.csv"
loader = CSVLoader(file_path=file)

In [5]:
from langchain.indexes import VectorstoreIndexCreator

In [5]:
pip install docarray

Collecting docarray
  Downloading docarray-0.41.0-py3-none-any.whl.metadata (36 kB)
Collecting rich>=13.1.0 (from docarray)
  Downloading rich-14.0.0-py3-none-any.whl.metadata (18 kB)
Collecting types-requests>=2.28.11.6 (from docarray)
  Downloading types_requests-2.32.0.20250602-py3-none-any.whl.metadata (2.1 kB)
Collecting markdown-it-py>=2.2.0 (from rich>=13.1.0->docarray)
  Downloading markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)
Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich>=13.1.0->docarray)
  Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)
Downloading docarray-0.41.0-py3-none-any.whl (302 kB)
Downloading rich-14.0.0-py3-none-any.whl (243 kB)
Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)
Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)
Downloading types_requests-2.32.0.20250602-py3-none-any.whl (20 kB)
Installing collected packages: types-requests, mdurl, markdown-it-py, rich, docarray
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [7]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [13]:
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-1.5-flash")
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

ValidationError: 1 validation error for VectorstoreIndexCreator
embedding
  Field required [type=missing, input_value={'vectorstore_cls': <clas...ocArrayInMemorySearch'>}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing

In [14]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator

embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001"  # or "models/gemini-embedding-exp-03-07"
)

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])




In [15]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [18]:
llm_replacement_model = ChatGoogleGenerativeAI(temperature=0, 
                               model='gemini-1.5-flash')

response = index.query(query, 
                       llm = llm_replacement_model)

In [19]:
display(Markdown(response))

| Shirt Name             | Description Summary                                                                                                                                   | Sun Protection | Material                     | Care Instructions          |
|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|------------------------------|-----------------------------|
| Women's Tropical Tee   | Sleeveless button-up shirt with SunSmart™ UPF 50+ protection. Slightly fitted, falls at hip. Wrinkle resistant, with pockets and venting.                 | UPF 50+          | 71% nylon, 29% polyester     | Machine wash and dry        |
| Sun Shield Shirt       | High-performance sun shirt with UPF 50+ protection. Slightly fitted, falls at hip. Moisture-wicking, abrasion resistant. Recommended by Skin Cancer Foundation. | UPF 50+          | 78% nylon, 22% Lycra Xtra Life | Handwash, line dry          |
| Sunrise Tee            | Lightweight, UV-protective button-down shirt. Moisture-wicking, wrinkle-free, UPF 50+. Originally designed for fishing.                               | UPF 50+          | 71% nylon, 29% polyester     | Machine wash and dry        |
| Tropical Breeze Shirt | Lightweight, breathable long-sleeve men's shirt with SunSmart™ UPF 50+ protection. Wrinkle-resistant, moisture-wicking. Originally designed for fishing.     | UPF 50+          | 71% nylon, 29% polyester     | Machine wash and dry        |

### STEP BY STEP

In [20]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file)

In [21]:
docs = loader.load()

In [22]:
docs[0]

Document(metadata={'source': '/Users/pulkitaggarwal/LangChain1/OutdoorClothingCatalog_1000.csv', 'row': 0}, page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.")

In [23]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001"  # or "models/gemini-embedding-exp-03-07"
)

In [24]:
embed = embeddings.embed_query("Hi my name is Harrison")

In [25]:
print(len(embed))

768


In [26]:
print(embed[:5])

[0.010543029755353928, -0.025523174554109573, -0.03037361055612564, -0.033120352774858475, 0.029262393712997437]


In [27]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [28]:
query = "Please suggest a shirt with sunblocking"

In [29]:
docs = db.similarity_search(query)

In [30]:
len(docs)

4

In [31]:
docs[0]

Document(metadata={'source': '/Users/pulkitaggarwal/LangChain1/OutdoorClothingCatalog_1000.csv', 'row': 255}, page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.')

In [32]:
retriever = db.as_retriever()

In [35]:
llm = ChatGoogleGenerativeAI(temperature = 0.0, model="gemini-1.5-flash")

In [36]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [37]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


  response = llm.call_as_llm(f"{qdocs} Question: Please list all your \


In [38]:
display(Markdown(response))

| Shirt Name                     | Summary                                                                                                                                                                                                                                | Material                     | UPF Rating | Wash Instructions |
|---------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------|-------------|--------------------|
| Sun Shield Shirt by             | High-performance sun shirt with UPF 50+ protection, moisture-wicking, abrasion-resistant, slightly fitted.                                                                                                                                      | 78% nylon, 22% Lycra Xtra Life fiber | 50+         | Handwash, line dry |
| Tropical Breeze Shirt           | Lightweight, breathable long-sleeve men's shirt with UPF 50+ SunSmart protection, wrinkle-resistant, moisture-wicking, traditional fit, cape venting.                                                                                             | 71% nylon, 29% polyester    | 50+         | Machine wash & dry |
| Men's Plaid Tropic Shirt, Short-Sleeve | Short-sleeve shirt with UPF 50+ SunSmart protection, wrinkle-free, moisture-wicking, cape venting, bellows pockets. Originally designed for fishing, great for travel.                                                                               | 52% polyester, 48% nylon     | 50+         | Machine wash & dry |
| Sunrise Tee                     | Women's UV-protective button-down shirt, lightweight, moisture-wicking, wrinkle-free, UPF 50+ SunSmart protection, slightly fitted, cape venting, pockets, tool tabs, eyewear loop. Originally designed for fishing, great for travel. | 71% nylon, 29% polyester    | 50+         | Machine wash & dry |

In [39]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [40]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [41]:
response = qa_stuff.run(query)

  response = qa_stuff.run(query)




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [42]:
display(Markdown(response))

| ID | Name | Summary |
|---|---|---|
| 709 | Sunrise Tee | Women's UV-protective button-down shirt. Lightweight, moisture-wicking, wrinkle-free fabric with UPF 50+ sun protection. Features include pockets, tool tabs, eyewear loop, and cape venting. |
| 679 | Women's Tropical Tee, Sleeveless | Women's sleeveless button-up shirt with UPF 50+ sun protection. Slightly fitted, wrinkle-resistant, with pockets, tool tabs, and eyewear loop. |
| 255 | Sun Shield Shirt | High-performance sun shirt with UPF 50+ sun protection. Moisture-wicking, abrasion-resistant, and fits over a swimsuit. Handwash and line dry. Recommended by The Skin Cancer Foundation. |
| 374 | Men's Plaid Tropic Shirt, Short-Sleeve | Men's short-sleeve shirt with UPF 50+ sun protection. Lightweight, wrinkle-free, moisture-wicking fabric. Features cape venting and bellows pockets. |

In [43]:
response = index.query(query, llm=llm)

In [44]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])