# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [18]:
#pip install --upgrade langchain

In [19]:
import os
import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key = os.environ['OPENAI_API_KEY']
openai.api_base = os.environ['OPENAI_API_BASE']
openai.api_type = os.environ['OPENAI_API_TYPE']
openai.api_version = os.environ['OPENAI_API_VERSION']

model = os.environ['CHAT_MODEL_NAME']

In [20]:
from langchain.chains import RetrievalQA
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [21]:
import pandas as pd
file = 'fine_food_reviews_1k.csv'
data = pd.read_csv(file,index_col=0)
data.head()

Unnamed: 0,Time,ProductId,UserId,Score,Summary,Text
0,1351123200,B003XPF9BO,A3R7JR3FMEBXQB,5,where does one start...and stop... with a tre...,Wanted to save some to bring to my Chicago fam...
1,1351123200,B003JK537S,A3JBPC3WFUT5ZP,1,Arrived in pieces,"Not pleased at all. When I opened the box, mos..."
2,1351123200,B000JMBE7M,AQX1N6A51QOKG,4,"It isn't blanc mange, but isn't bad . . .",I'm not sure that custard is really custard wi...
3,1351123200,B004AHGBX4,A2UY46X0OSNVUQ,3,These also have SALT and it's not sea salt.,I like the fact that you can see what you're g...
4,1351123200,B001BORBHO,A1AFOYZ9HSM2CZ,5,Happy with the product,My dog was suffering with itchy skin. He had ...


In [22]:
loader = CSVLoader(file_path=file)
loader

<langchain.document_loaders.csv_loader.CSVLoader at 0x7fd29c1f4cd0>

In [15]:
#!pip install "langchain[docarray]"

In [23]:
from langchain.indexes import VectorstoreIndexCreator

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])


InvalidRequestError: Too many inputs. The max number of inputs is 1.  We hope to increase the number of inputs per request soon. Please contact us through an Azure support request at: https://go.microsoft.com/fwlink/?linkid=2213926 for further questions.

In [24]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

response = index.query(query)
print(response)

NameError: name 'index' is not defined

In [None]:
display(Markdown(response))

In [12]:
loader = CSVLoader(file_path=file)

In [13]:
docs = loader.load()
len(docs)

1000

In [14]:
docs[0]

Document(page_content=': 0\nTime: 1351123200\nProductId: B003XPF9BO\nUserId: A3R7JR3FMEBXQB\nScore: 5\nSummary: where does one  start...and stop... with a treat like this\nText: Wanted to save some to bring to my Chicago family but my North Carolina family ate all 4 boxes before I could pack. These are excellent...could serve to anyone', metadata={'source': 'fine_food_reviews_1k.csv', 'row': 0})

In [15]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [17]:
embed = embeddings.embed_query("Hi my name is Harrison")
print(len(embed))


1536


In [18]:
print(embed[:5])

[-0.02186359278857708, 0.006734037306159735, -0.01820078119635582, -0.03919587284326553, -0.014047075994312763]


In [19]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

InvalidRequestError: Too many inputs. The max number of inputs is 1.  We hope to increase the number of inputs per request soon. Please contact us through an Azure support request at: https://go.microsoft.com/fwlink/?linkid=2213926 for further questions.

In [None]:
query = "Please suggest a shirt with sunblocking"

In [None]:
docs = db.similarity_search(query)

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
retriever = db.as_retriever()

In [None]:
llm = AzureChatOpenAI(temperature=0.0,
    openai_api_base=openai.api_base,
    openai_api_version=openai.api_version,
    deployment_name=model,
    openai_api_key=openai.api_key,
    openai_api_type = openai.api_type,
)

In [None]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [None]:
display(Markdown(response))

In [None]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)

In [None]:
display(Markdown(response))

In [None]:
response = index.query(query, llm=llm)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])