# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [1]:
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')

In [2]:
!pip install -qU langchain langchain_community langchain-google-genai

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m292.2/292.2 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI
Model_name = "gemini-1.5-flash"

llm_model = ChatGoogleGenerativeAI(
    model = Model_name,
    api_key= GOOGLE_API_KEY,
    temperature=0.5,
)

In [4]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [5]:
# Get Sample file
!curl -L -o OutdoorClothingCatalog_1000.csv https://raw.githubusercontent.com/gopitk/dlai-sk/refs/heads/main/OutdoorClothingCatalog_1000.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  734k  100  734k    0     0  1448k      0 --:--:-- --:--:-- --:--:-- 1448k


In [6]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [7]:
from langchain.indexes import VectorstoreIndexCreator

In [8]:
!pip install -qU docarray

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/270.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m266.2/270.2 kB[0m [31m13.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.2/270.2 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [12]:
# Import the GoogleGenerativeAIEmbeddings class from the langchain_google_genai module
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",
                                          google_api_key=GOOGLE_API_KEY)

In [17]:
# Pass the embeddings model to VectorstoreIndexCreator
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])


In [14]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [15]:
response = index.query(query, llm = llm_model)

In [16]:
display(Markdown(response))

| Shirt Name | Description |
|---|---|
| Women's Tropical Tee, Sleeveless | A sleeveless button-up shirt with a flattering fit and SunSmart™ protection to block the sun's harmful UV rays. Features wrinkle resistance, low-profile pockets, and side shaping. |
| Sun Shield Shirt | A high-performance sun shirt guaranteed to protect from harmful UV rays. Features moisture-wicking, quick-drying, and abrasion-resistant fabric. Recommended by The Skin Cancer Foundation as an effective UV protectant. |
| Sunrise Tee | A lightweight, UV-protective button-down shirt for hot weather. Features moisture-wicking, wrinkle-free fabric and UPF 50+ sun protection. Originally designed for fishing, but also great for travel. |
| Tropical Breeze Shirt | A lightweight, breathable long-sleeve men’s UPF shirt offering superior SunSmart™ protection from the sun's harmful rays. Features wrinkle-resistant, moisture-wicking fabric and UPF 50+ sun protection. Originally designed for fishing, but also great for extended travel. | 


## Step By Step

In [27]:
# from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file)

In [28]:
docs = loader.load()

In [29]:
docs[0]

Document(metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0}, page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.")

In [30]:
# from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001",
                                          google_api_key=GOOGLE_API_KEY)

In [31]:
embed = embeddings.embed_query("Hi my name is Muhammad Khubaib")

In [32]:
print(len(embed))

768


In [33]:
print(embed[:5])

[0.012749855406582355, -0.04938590154051781, -0.02465217560529709, -0.05600918084383011, 0.041469085961580276]


In [34]:
db = DocArrayInMemorySearch.from_documents(
    docs,
    embeddings
)

In [35]:
query = "Please suggest a shirt with sunblocking"

In [36]:
docs = db.similarity_search(query)

In [37]:
len(docs)

4

In [38]:
docs[0]

Document(metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 255}, page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.')

In [39]:
retriever = db.as_retriever()

In [40]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [41]:
response = llm_model.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.")

  response = llm_model.call_as_llm(f"{qdocs} Question: Please list all your \


In [42]:
display(Markdown(response))

## Sun Protection Shirts

| Shirt Name | Description | Sun Protection | Fabric | Size & Fit |
|---|---|---|---|---|
| Sun Shield Shirt | High-performance sun shirt with UPF 50+ rating. Wicks moisture, abrasion resistant. | UPF 50+ | 78% nylon, 22% Lycra Xtra Life fiber | Slightly Fitted |
| Tropical Breeze Shirt | Lightweight, breathable long-sleeve shirt with UPF 50+ rating. Wrinkle-resistant, moisture-wicking. | UPF 50+ | 71% nylon, 29% polyester, polyester-mesh inserts | Traditional Fit |
| Men's Plaid Tropic Shirt, Short-Sleeve | Ultracomfortable sun protection with UPF 50+ rating. Wrinkle-free, moisture-wicking. | UPF 50+ | 52% polyester, 48% nylon | N/A |
| Sunrise Tee | Lightweight, high-performance button down shirt with UPF 50+ rating. Wicks away moisture, dries quickly. | UPF 50+ | 71% nylon, 29% polyester | Slightly Fitted |

**Summary:**

This table lists four shirts offering sun protection with UPF 50+ rating. They all feature lightweight, breathable fabrics that wick moisture and dry quickly. The Sun Shield Shirt is a fitted, short-sleeve option, while the Tropical Breeze Shirt is a long-sleeve, traditional fit. The Men's Plaid Tropic Shirt is a short-sleeve, casual option, and the Sunrise Tee is a button-down, slightly fitted shirt. All shirts offer the highest rated sun protection available, blocking 98% of the sun's harmful UV rays. 


In [43]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm_model,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

In [44]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [58]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [59]:
response
# display(Markdown(response))

"| Shirt Name | Description |\n|---|---|\n| Sunrise Tee | Lightweight, high-performance fabric wicks away moisture and dries quickly. Built-in SunSmart™ UPF 50+ rated – the highest rated sun protection possible. |\n| Women's Tropical Tee, Sleeveless | Five-star sleeveless button-up shirt with SunSmart™ protection to block the sun’s harmful UV rays. Built-in SunSmart™ UPF 50+ rated – the highest rated sun protection possible. |\n| Sun Shield Shirt by | High-performance sun shirt guaranteed to protect from harmful UV rays. UPF 50+ rated – the highest rated sun protection possible. Wicks moisture for quick-drying comfort. Abrasion resistant for season after season of wear. |\n| Men's Plaid Tropic Shirt, Short-Sleeve | Ultracomfortable sun protection is rated to UPF 50+, helping you stay cool and dry. Originally designed for fishing, this lightest hot-weather shirt offers UPF 50+ coverage and is great for extended travel. SunSmart technology blocks 98% of the sun's harmful UV rays, while t

In [56]:
response = index.query(query, llm=llm_model)

In [57]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])