# LangChain: Q&A over Documents

In [34]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOllama
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain_community.llms import Ollama
from langchain.embeddings import HuggingFaceEmbeddings

In [35]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [36]:
from langchain.indexes import VectorstoreIndexCreator

In [None]:
%pip install docarray

In [None]:
%pip install sentence-transformers

In [37]:
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"  # Modelo ligero y efectivo
)

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embedding_model
).from_loaders([loader])

In [38]:
query ="Por favor, listame todos los productos que esten dentro de la categoria 'Camisetas'"

In [41]:
llm = Ollama(
    model="llama3.2",
    temperature=0
)

response = index.query(query, llm)

In [42]:
display(Markdown(response))

Claro, aquí te dejo la lista de productos que están dentro de la categoría "Camisetas":

1. Outdoor Gear 731
2. Outdoor Gear 379
3. Outdoor Gear 579
4. Outdoor Gear 789

### Step by step

In [43]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file)

In [44]:
docs = loader.load()

In [45]:
docs[0]

Document(metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0}, page_content='ProductID: P0001\nProductName: Outdoor Gear 1\nCategory: Chaquetas\nPrice: 105.88\nStock: 47\nRating: 4.5')

In [46]:
embed = embedding_model.embed_query("Hola, mi nombre es Tomas")

In [47]:
print(len(embed))

384


In [48]:
print(embed[:5])

[-0.041326768696308136, 0.11336180567741394, -0.0167353805154562, -0.032827600836753845, -0.030062709003686905]


In [49]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embedding_model
)

In [50]:
query = "Por favor, dame el producto mas barato de la categoria 'Camisetas'"

In [51]:
docs = db.similarity_search(query)

In [52]:
len(docs)

4

In [53]:
docs[0]

Document(metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 378}, page_content='ProductID: P0379\nProductName: Outdoor Gear 379\nCategory: Camisetas\nPrice: 220.77\nStock: 61\nRating: 3.6')

In [54]:
retriever = db.as_retriever()

In [56]:
llm = ChatOllama(model="llama3.2", temperature=0)

  llm = ChatOllama(model="llama3.2", temperature=0)


In [57]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [58]:
response = llm.call_as_llm(f"{qdocs} Question: Por favor, listame todos los productos que esten dentro de la categoria 'Camisetas'")

  response = llm.call_as_llm(f"{qdocs} Question: Por favor, listame todos los productos que esten dentro de la categoria 'Camisetas'")


In [59]:
display(Markdown(response))

¡Claro! A continuación, te presento la lista de productos que están dentro de la categoría "Camisetas":

1. Outdoor Gear 379
2. Outdoor Gear 731
3. Outdoor Gear 687
4. Outdoor Gear 683

Espero que esta información sea útil para ti. ¡Si necesitas algo más, no dudes en preguntar!

In [60]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [61]:
query ="Por favor, listame todos los productos que esten dentro de la categoria 'Camisetas'"

In [62]:
response = qa_stuff.run(query)

  response = qa_stuff.run(query)




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [63]:
display(Markdown(response))

Claro, aquí te dejo la lista de productos dentro de la categoría "Camisetas":

1. Outdoor Gear 731 (P0731)
2. Outdoor Gear 379 (P0379)
3. Outdoor Gear 579 (P0579)
4. Outdoor Gear 789 (P0789)

In [64]:
response = index.query(query, llm)

In [66]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embedding_model,
).from_loaders([loader])