# LangChain: Q&A over Documents
An example might be a tool that would allow you to query a product catalog for items of interest.

In [42]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [43]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [44]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [45]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file, encoding="utf8")

In [46]:
from langchain.indexes import VectorstoreIndexCreator

In [47]:
#pip install docarray

In [48]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [49]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [50]:
response = index.query(query)

In [51]:
display(Markdown(response))



| Name | Description |
| --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | UPF 50+ rated, 100% polyester, wrinkle-resistant, front and back cape venting, two front bellows pockets |
| Men's Plaid Tropic Shirt, Short-Sleeve | UPF 50+ rated, 52% polyester and 48% nylon, machine washable and dryable, front and back cape venting, two front bellows pockets |
| Men's TropicVibe Shirt, Short-Sleeve | UPF 50+ rated, 71% Nylon, 29% Polyester, 100% Polyester knit mesh, wrinkle resistant, front and back cape venting, two front bellows pockets |
| Sun Shield Shirt by | UPF 50+ rated, 78% nylon, 22% Lycra Xtra Life fiber, wicks moisture, fits comfortably over swimsuit, abrasion resistant |

All four shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. The Men's Tropical Plaid Short-Sleeve Shirt is made of 100% polyester and is wrinkle-resistant. The Men's Plaid Trop

# Step By Step

In [52]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file, encoding="utf8")

In [53]:
docs = loader.load()

In [54]:
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [55]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [56]:
embed = embeddings.embed_query("Hi my name is Harrison")

In [57]:
print(embed)

[-0.021913960932078383, 0.006774206755842609, -0.018190348816400977, -0.039148249368104494, -0.014089343366938917, 0.016831733559834288, 0.0023115328888624583, -0.008145401386628998, 0.012843946203639883, 0.00010653493691555377, 0.0037487717954393937, 0.008296359465200933, -0.014403837966979896, -0.0025505486824481173, 0.004613631014229494, -0.01645434115737222, 0.02878251718552039, -0.019674761540455023, 0.03321059474659795, -0.02114659582161196, -0.011850144906638148, -0.01976281995396069, 0.01251058207660806, -0.003918598935340877, -0.015133464954787219, -0.00767995010499997, 0.017951334186968558, 5.945907565739409e-05, 0.024882788242831654, -0.0274742194258325, -0.0007736559035218474, -0.001819349389278655, 0.007503833743649932, -0.04309829767031721, -0.016441760851829933, -0.0250714835127401, -0.0016149281553657312, -0.01954896407296767, 0.014189982085986874, -0.01923446947292669, 0.024253799042749696, -0.020228272632573603, -0.005648317372921079, 0.006123203418045529, -0.01635370

In [58]:
print(len(embed))

1536


In [59]:
print(embed[:5])

[-0.021913960932078383, 0.006774206755842609, -0.018190348816400977, -0.039148249368104494, -0.014089343366938917]


In [60]:
db = DocArrayInMemorySearch.from_documents(
    docs,
    embeddings
)

In [61]:
query = "Please suggest a shirt with sunblocking"

In [62]:
len(docs)

1000

In [63]:
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [64]:
retriever = db.as_retriever()

In [65]:
llm = ChatOpenAI(temperature=0.0, model=llm_model)

In [66]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 

In [None]:
display(Markdown(response))

In [None]:
qa_stuff = RetreivalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retreiver=retreiver,
    verbose=True,
)

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)

In [None]:
display(Markdown(response))

In [68]:
# THis one line is the same as the multiple steps done here, as an example of a simpler way to do this
response = index.query(query, llm=llm)

In [None]:
index = VectorStoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])