In [1]:
import os
os.environ["NVIDIA_API_KEY"] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

In [2]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
llm_nvidia = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1")

In [3]:
from langchain_community.chat_models import ChatOllama

llm_ollama = ChatOllama(model="llama3")

In [4]:
llm = llm_nvidia

# Question and Answer

![LLMS on Documents](data/images/llms-on-documents.png)

![Embeddings](data/images/embeddings.png)

![Vector Database](data/images/vector-database.png)

![Vector Database to LLM](data/images/vector-database-to-llm.png)

In [5]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

### VectorstoreIndexCreator

In [6]:
file = 'data/files/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [7]:
from langchain.indexes import VectorstoreIndexCreator

In [8]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

  from tqdm.autonotebook import tqdm, trange


In [9]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding= embedding
).from_loaders([loader])



In [10]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [11]:
response = index.query(query, 
                       llm = llm)

In [12]:
display(Markdown(response))

 | Product ID | Name | Description | Fabric & Care | Sun Protection |
| --- | --- | --- | --- | --- |
| 679 | Women's Tropical Tee, Sleeveless | Five-star sleeveless button-up shirt with SunSmart™ protection to block UV rays. Slightly fitted, wrinkle-resistant, and has two front pockets. Fabric: 71% nylon, 29% polyester. Machine washable. | Built-in SunSmart™ UPF 50+ rated sun protection, blocking 98% of harmful rays | Yes |
| 255 | Sun Shield Shirt by | High-performance sun shirt that is guaranteed to protect from harmful UV rays. Slightly fitted, moisture-wicking, quick-drying, and abrasion-resistant. Fabric: 78% nylon, 22% Lycra Xtra Life fiber. Handwash and line dry recommended. | UPF 50+ rated sun protection, blocking 98% of harmful rays, and recommended by The Skin Cancer Foundation as an effective UV protectant. | Yes |
| 618 | Men's Tropical Plaid Short-Sleeve Shirt | Lightweight hot-weather shirt with front and back cape venting for cool breezes. Relaxed traditional fit and two front bellows pockets. Fabric: 100% polyester. Machine washable. | UPF 50+ rated sun protection, blocking 98% of harmful rays, and wrinkle-resistant. | Yes |
| 709 | Sunrise Tee | Lightweight performance synthetic shirt for women. Slightly fitted, wrinkle-free, and has a cape lining for sun protection. Fast-drying and moisture-wicking. Fabric: 71% nylon, 29% polyester. Machine washable. | Built-in sun protection with a UPF 50+ rating. | Yes |

In summary, all four shirts offer UPF 50+ sun protection that blocks 98% of harmful UV rays, is lightweight and designed for hot weather, wrinkle-resistant, and machine washable. The Women's Tropical Tee and Sunrise Tee are specifically designed for women, while the Tropical Plaid Shirt is for men. The Sun Shield Shirt offers additional features such as moisture-wicking and quick-drying capabilities.

### Step by Step

Question answering over documents consists of four steps:
1) Create an index
2) Create a Retriever from that index
3) Create a question-answering chain
4) Ask questions!


To use retrieval in LangChain, you can follow these steps:
1) Load documents: Use document loaders to load documents from various sources, such as files, websites, or databases.
1) Transform documents: Apply document transformers to preprocess and transform the loaded documents, such as splitting large documents into smaller chunks or applying specific logic optimized for different document types.
1) Create embeddings: Generate embeddings for the documents using text embedding models. Embeddings capture the semantic meaning of text and enable efficient searching and similarity calculations.
1) Store documents and embeddings: Use vector stores to store the documents and their corresponding embeddings. Vector stores provide efficient storage and retrieval capabilities for large collections of embeddings.
1) Retrieve relevant documents: Use retrievers to query the vector store and retrieve relevant documents based on user queries or search criteria. Retriever algorithms, such as similarity search or Maximum Marginal Relevance (MMR) search, can be used to find the most relevant documents.

#### Fully Manual Way of Doing Things

In [13]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file)

In [14]:
docs = loader.load()

In [15]:
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [16]:
embed = embedding.embed_query("Subash")

In [17]:
embed[:5]

[0.014350582845509052,
 -0.049315445125103,
 0.013720898889005184,
 -0.05654430389404297,
 0.08923717588186264]

In [18]:
db = DocArrayInMemorySearch.from_documents(
    docs,
    embedding
)

In [19]:
query = "Please suggest a shirt with sunblocking"

In [20]:
response_docs = db.similarity_search(query)

In [21]:
response_docs[0]

Document(page_content=": 709\nname: Sunrise Tee\ndescription: Stay cool, comfortable and dry on the hottest days in our women's UV-protective button down shirt. The lightweight, high-performance fabric wicks away moisture and dries quickly.\n\nSize & Fit\nSlightly Fitted: Softly shapes the body. Falls at hip.\n\nWhy We Love It\nOur lightest hot-weather shirt lets you beat the heat. Originally designed for fishing, it's also a great choice for travel thanks to its wrinkle-free fabric and built-in sun protection with a rating of UPF 50+.\n\nFabric & Care\nLightweight performance synthetic wicks moisture, resists wrinkles and dries fast. Shell: 71% nylon, 29% polyester. Cape lining: 100% polyester. Machine wash and dry.\n\nAdditional Features\nBuilt-in SunSmart™ UPF 50+ rated – the highest rated sun protection possible. The high-performance fabric keeps you cool and comfortable by wicking perspiration away. Smoother buttons, low-profile pockets and side shaping for a flattering fit. Front

If we were doing this by hand, we would combine the documents into a single piece of text.

In [22]:
qdocs = "".join([docs[i].page_content for i in range(len(response_docs))])

In [23]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.")

  warn_deprecated(


In [24]:
display(Markdown(response))

 | Name | Description | Size & Fit | Specs | Construction & Additional Features |
|---|---|---|---|---|
| Women's Campside Oxfords | Ultracomfortable lace-to-toe Oxford with soft canvas material and EVA foam midsole. Order regular shoe size. | Approx. weight: 1 lb.1 oz. per pair. | Features Cleansport NXT® antimicrobial odor control. | Imported. |
| Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece | Bright colored, ruffles and whimsical prints two-piece swimsuit for toddlers with UPF 50+ rated fabric. | N/A | Made from four-way-stretch and chlorine-resistant fabric. | Fully lined bottom and crossover no-slip straps. |
| Refresh Swimwear, V-Neck Tankini Contrasts | Watersport-ready tankini top with V-neck silhouette, recycled nylon and Lycra® spandex for perfect stretch. | Fitted: Sits close to the body | UPF 50+ rated - the highest rated sun protection possible. | Lightweight racerback straps, handwash, line dry. |

1. Women's Campside Oxfords: These ultracomfortable lace-to-toe Oxfords feature a soft canvas material and EVA foam midsole, providing a broken-in feel and look. With Cleansport NXT® antimicrobial odor control, these shoes are imported and order in regular shoe size.
2. Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece: A bright colored two-piece swimsuit designed for toddler girls, with UPF 50+ rated fabric that provides the highest rated sun protection possible, blocking 98% of the sun's harmful rays. It is made from four-way-stretch and chlorine-resistant fabric, with fully lined bottom and crossover no-slip straps for a secure fit and maximum coverage.
3. Refresh Swimwear, V-Neck Tankini Contrasts: Watersport-ready tankini top made of premium Italian-blend recycled nylon with Lycra® spandex, providing perfect stretch. The top has lightweight racerback straps and a flattering V-neck silhouette, with UPF 50+ rated - the highest rated sun protection possible. It is handwash and line dry.

#### Using RetrievalQA

For Question Answer over our own documents, we need to create a  **Retriever** from this **Vector Store**.

**Retriever** is a generic interface that can be underpinned by any method that takes in a query and returns documents.

In [25]:
retriever = db.as_retriever()

All these steps can be encapsulated with the LangChain chain. Here we can create a retrieval QA chain. This does retrieval and does Question Answering over the retrieved documents.

![Stuff Method](data/images/stuff-method.png)

![Additional Chain Types](data/images/additional-chain-types.png)

In [26]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

In [27]:
query = "Please list all your shirts with sun protection in a table in markdown and summarize each one."

In [28]:
response = qa_stuff.run(query)

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [29]:
display(Markdown(response))

 Here is a table summarizing the shirts with sun protection:

| Name | Description | Size & Fit | Fabric & Care | Additional Features | Sun Protection |
| --- | --- | --- | --- | --- | --- |
| Sunrise Tee | Lightweight, high-performance shirt for hot weather with UV-protective fabric | Slightly fitted, falls at hip | 71% nylon, 29% polyester | Built-in sun protection (UPF 50+), moisture-wicking, wrinkle-resistant | Blocks 98% of sun's harmful rays |
| Women's Tropical Tee, Sleeveless | Sleeveless, button-up shirt with sun protection and flattering fit | Slightly fitted, falls at hip | 71% nylon, 29% polyester | Updated design with smoother buttons, wrinkle-resistant | Blocks 98% of sun's harmful rays |
| Sun Shield Shirt | High-performance sun shirt with UV protection | Slightly fitted, falls at hip | 78% nylon, 22% Lycra Xtra Life fiber | Moisture-wicking, quick-drying, abrasion-resistant | Blocks 98% of sun's harmful rays |
| Men's Tropical Plaid Short-Sleeve Shirt | Lightweight, hot-weather shirt with UV protection and traditional fit | Relaxed through chest, sleeve, and waist | 100% polyester | Front and back cape venting, two front bellows pockets | Blocks 98% of sun's harmful rays |

All of these shirts offer sun protection by blocking 98% of the sun's harmful rays through the use of high-performance fabrics. They are all designed for hot weather and have moisture-wicking and wrinkle-resistant properties. The Sunrise Tee and Women's Tropical Tee are slightly fitted and have low-profile pockets, while the Sun Shield Shirt is abrasion-resistant and fits comfortably over a swimsuit. The Men's Tropical Plaid Short-Sleeve Shirt has a traditional, relaxed fit and front and back cape venting.

#### Using VectorStoreIndexCreator

VectorstoreIndexCreator is just a wrapper around all this logic.

A lot of the magic is being hidden in this: VectorstoreIndexCreator.
Three main steps are going on after the documents are loaded:
1) Splitting documents into chunks
2) Creating embeddings for each document
3) Storing documents and embeddings in a vectorstore

In [30]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embedding,
).from_loaders([loader])
response = index.query(query, llm=llm)

In [31]:
display(Markdown(response))

 | Product ID | Name | Description | Fabric & Care | Sun Protection |
| --- | --- | --- | --- | --- |
| 679 | Women's Tropical Tee, Sleeveless | Five-star sleeveless button-up shirt with SunSmart™ protection to block UV rays. Slightly fitted, wrinkle-resistant, and has two front pockets. Fabric: 71% nylon, 29% polyester. Machine washable. | Built-in SunSmart™ UPF 50+ rated sun protection, blocking 98% of harmful rays | Yes |
| 255 | Sun Shield Shirt by | High-performance sun shirt that is guaranteed to protect from harmful UV rays. Slightly fitted, moisture-wicking, quick-drying, and abrasion-resistant. Fabric: 78% nylon, 22% Lycra Xtra Life fiber. Handwash and line dry recommended. | UPF 50+ rated sun protection, blocking 98% of harmful rays, and recommended by The Skin Cancer Foundation as an effective UV protectant. | Yes |
| 618 | Men's Tropical Plaid Short-Sleeve Shirt | Lightweight hot-weather shirt with front and back cape venting for cool breezes. Relaxed traditional fit and two front bellows pockets. Fabric: 100% polyester. Machine washable. | UPF 50+ rated sun protection, blocking 98% of harmful rays, and wrinkle-resistant. | Yes |
| 709 | Sunrise Tee | Lightweight performance synthetic shirt for women. Slightly fitted, wrinkle-free, and has a cape lining for sun protection. Fast-drying and moisture-wicking. Fabric: 71% nylon, 29% polyester. Machine washable. | Built-in sun protection with a UPF 50+ rating. | Yes |

In summary, all four shirts offer UPF 50+ sun protection that blocks 98% of harmful UV rays, is lightweight and designed for hot weather, wrinkle-resistant, and machine washable. The Women's Tropical Tee and Sunrise Tee are specifically designed for women, while the Tropical Plaid Shirt is for men. The Sun Shield Shirt offers additional features such as moisture-wicking and quick-drying capabilities.