# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

## Part 1: Creating the Vector Store

In [1]:
import os
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown


from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [5]:
useLogging = True # set to True to get logging information (and hopefully track which LLM is called when)
use_Ollama_For_Inference = True # set to True to use Ollama inference models (and pull at least the gemma:2b model)
use_Ollama_For_Embedding = True # set to True to use Ollama embedddings models (and pull at least the nomic-embed-text:latest model)
use_Pinecone = True #Turn on to use a Pinecone database. Sign up at www.pinecone.io for a free plan (including 5 indexes)
use_Chroma = True #Turn on to use a local Chroma database. Supersedes the use_Pinecone flag above (and turns it off)
use_Test_Data = False # set to True to use LimitedCSVLoader class below and only load the 577th item from the CSV file and test that querying with embeddings work well.

import openai
#Defaults to OpenAI if use_Ollama_For_Inference=False and use_Ollama_For_Inference=False
openai.api_base = inferApiBase = embedApiBase =  "https://api.openai.com/v1"
openai.base_url = inferBaseUrl = embedBaseUrl = "https://api.openai.com"
openai.api_key = inferApiKey = embedApiKey = os.environ['OPENAI_API_KEY']
embeddings_model_name = "text-embedding-ada-002"
embeddings_model_name_short = "ada"
embeddings_vector_size = 1536
infer_model_name = "gpt-3.5-turbo"
llm_platform = "openai"
embed_chunk_size = 1000
embed_overlap = 0


if use_Ollama_For_Inference:
    inferApiBase = "http://localhost:11434/v1"
    inferBaseUrl = "http://localhost:1143"
    inferApiKey = "ollama"
    infer_model_name = "gemma:2b" #you can/should customize this to test different Ollama LLMs. Use the NAME field from `ollama list`


if use_Ollama_For_Embedding:
    llm_platform = "ollama"
    embedApiBase = "http://localhost:11434/v1"
    embedBaseUrl = "http://localhost:1143"
    embedApiKey = "ollama"
    embeddings_model_name = "mxbai-embed-large:latest"
    embeddings_model_name_short = "mxbai"
    embeddings_vector_size = 1024
    embed_chunk_size = 512
    embed_overlap = 10
    #
    #embeddings_model_name = "nomic-embed-text:latest" #you can/should customize this to test different Ollama LLMs. Use the NAME field from `ollama list`
    #embeddings_model_name_short = "nomic"
    #embeddings_vector_size = 768
    #embed_chunk_size = 8192
    #embed_overlap = 0

print('Embed API Key:', embedApiKey)
print('Infer API Key:', inferApiKey)
print('Embed API Base:', embedApiBase)
print('Infer API Base:', inferApiBase)
print('Embeddings Model:', embeddings_model_name)
print('Inference Model:', infer_model_name)


index_prefix = "langchain-deeplearningai-" + embeddings_model_name_short + "-"
if use_Test_Data:
    index_prefix+='s-'
rag_index_name = index_prefix + llm_platform
print('RAG Index Name:', rag_index_name)

if use_Chroma:
    print('using Chroma Vector database')
    import chromadb
    use_Pinecone = False
    storage_path = os.environ.get('CHROMA_STORAGE_PATH')
    if storage_path is None:
        raise ValueError('CHROMA_STORAGE_PATH environment variable is not set')
    

elif use_Pinecone:

    print('using Pinecone Vector database')
    from pinecone import Pinecone
    from langchain_pinecone import PineconeVectorStore
    from tqdm.autonotebook import tqdm

    PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
    PINECONE_ENV = os.environ.get("PINECONE_ENV", "PINECONE_ENV")

    if PINECONE_API_KEY is None:
        raise ValueError("PINECONE_API_KEY environment variable not set.")
        # Name our index on Pineconeopenai.api_key

    # Init pinecone
    pc = Pinecone(
        api_key=PINECONE_API_KEY,
        source_tag="langchain-deeplearningai"
    )
else:
    print('using In Memory Vector database')

from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings


if use_Ollama_For_Embedding:
    embeddings_model = OllamaEmbeddings(model=embeddings_model_name, embed_instruction='', query_instruction='')
    #embeddings_model = OllamaEmbeddings(model=embeddings_model_name)
else:
    embeddings_model = OpenAIEmbeddings(model=embeddings_model_name)

if useLogging:
    import logging
    import requests 


    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s:%(lineno)d - %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S')
    requests.packages.urllib3.add_stderr_logger()
    OLLAMA_DEBUG=1

2024-06-10 10:38:17,359 DEBUG Added a stderr logging handler to logger: urllib3
2024-06-10 10:38:17,359 DEBUG Added a stderr logging handler to logger: urllib3
2024-06-10 10:38:17,359 DEBUG Added a stderr logging handler to logger: urllib3
2024-06-10 10:38:17,359 DEBUG Added a stderr logging handler to logger: urllib3
2024-06-10 10:38:17 - DEBUG - __init__.py:add_stderr_logger:90 - Added a stderr logging handler to logger: urllib3


Embed API Key: ollama
Infer API Key: ollama
Embed API Base: http://localhost:11434/v1
Infer API Base: http://localhost:11434/v1
Embeddings Model: mxbai-embed-large:latest
Inference Model: gemma:2b
RAG Index Name: langchain-deeplearningai-mxbai-ollama
using Chroma Vector database


In [6]:
from langchain_core.documents.base import Document

class LimitedCSVLoader(CSVLoader):
    def load(self):
        # Call the original load method to get all rows
        all_rows = super().load()

        # Restrict to the first 5 rows
        #limited_rows = all_rows[:5]
        # Restrict to line #577
        limited_rows = all_rows[577]
        if isinstance(limited_rows, Document):
            limited_rows = [limited_rows]
        return limited_rows

In [7]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file, encoding='utf-8')

if use_Test_Data:
    loader = LimitedCSVLoader(file_path=file, encoding='utf-8')

In [8]:
docs = loader.load()
print(f'Documents length: {len(docs)}')
docs[0]

Documents length: 1000


Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \r\n\r\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \r\n\r\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \r\n\r\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \r\n\r\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [9]:
from langchain.indexes.vectorstore import VectorstoreIndexCreator
##pip install docarray
##pip install pydantic==1.10.8

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
rc_text_splitter = RecursiveCharacterTextSplitter(chunk_size=embed_chunk_size, chunk_overlap=embed_overlap)

if use_Chroma:
    print(f'initializing the Chroma local vector store {rag_index_name}')
    from langchain_chroma import Chroma
    
    rebuildIndex = True
    if rebuildIndex:
        chromaClient = chromadb.PersistentClient(path=storage_path)
        try:
            print(f'Deleting collection {rag_index_name}')
            chromaClient.delete_collection(name=rag_index_name)
        except ValueError as ex:
            print(f'Could not delete collection {rag_index_name}')

    if not use_Ollama_For_Embedding:
        chromaClient = chromadb.PersistentClient(path=storage_path)
        collection = chromaClient.get_collection(name=rag_index_name)
        if collection.count() <= 0:
            index = VectorstoreIndexCreator(
                vectorstore_cls=Chroma,
                embedding=embeddings_model,
                text_splitter=rc_text_splitter,
                vectorstore_kwargs={ "collection_name": rag_index_name,
                                    "persist_directory":  storage_path}
            ).from_loaders([loader])
    else:
        chromaClient = chromadb.PersistentClient(path=storage_path)
        collection = chromaClient.get_or_create_collection(name=rag_index_name)
        if collection.count() <= 0:
            print('Creating Chroma database using Ollama embeddings')
            index = VectorstoreIndexCreator(
                vectorstore_cls=Chroma,
                embedding=embeddings_model,
                text_splitter=rc_text_splitter,
                vectorstore_kwargs={ "collection_name": rag_index_name,
                                    "persist_directory":  storage_path}
            ).from_loaders([loader])
            # from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter
            # text_splitter = RecursiveCharacterTextSplitter(chunk_size=embed_chunk_size, chunk_overlap=embed_overlap)  
            # docs = loader.load()
            # subdocs = text_splitter.split_documents(docs)
            # #
            # print(subdocs[0].page_content)
            # #embeddings = [embeddings_model.embed_query(text.page_content) for text in subdocs]
            # #print(embeddings)
            # print("Adding items into the Chroma database")
            # vectorstore = Chroma.from_documents(subdocs, embeddings_model, collection_name=rag_index_name)
elif use_Pinecone:
    print(f'initializing the Pinecone vector store {rag_index_name}')

    from pinecone import Pinecone, ServerlessSpec
    import time
    
    if rag_index_name in pc.list_indexes().names():
        print("Index {} already created".format(rag_index_name))
        #exit()
        index = pc.Index(rag_index_name)
    else:
        print("Creating the Pinecone index...")
        pc.create_index(
            name=rag_index_name,
            dimension=embeddings_vector_size,
            metric='euclidean',
            spec=ServerlessSpec(
                cloud='aws',
                region='us-east-1'
            )
        )
        # Wait for index to be initialized
        while not pc.describe_index(rag_index_name).status["ready"]:
            # TODO remove me
            print("sleeping")
            time.sleep(1)

        if not use_Ollama_For_Embedding:
            index = VectorstoreIndexCreator(
                vectorstore_cls=PineconeVectorStore,
                embedding=embeddings_model,
                text_splitter=rc_text_splitter,
                vectorstore_kwargs={ "index_name": rag_index_name}
            ).from_loaders([loader])
        else:
            print('using Ollama embeddings')
            
            
            docs = loader.load()
            subdocs = rc_text_splitter.split_documents(docs)
            #
            #print(subdocs[0].page_content)
            #embeddings = [embeddings_model.embed_query(text.page_content) for text in subdocs]
            #print(embeddings)
            print("Upserting the vectors into the database")
            vectorstore = PineconeVectorStore.from_documents(subdocs, embeddings_model, index_name=rag_index_name)
        
        #vectorstore.add_documents(splits)
else:
    print('using DocArrayInMemorySearch')
    index = VectorstoreIndexCreator(
        vectorstore_cls=DocArrayInMemorySearch,
        embedding=embeddings_model
    ).from_loaders([loader])

    inMemoryIndex = index    

In [None]:
chromaClient = chromadb.PersistentClient(path=storage_path)
print(chromaClient.count_collections())
print(chromaClient.list_collections())

#rag_index_name = 'langchain-deeplearningai-nomic-ollama'
print(rag_index_name)
collection = chromaClient.get_collection(name=rag_index_name)
#collection.modify(name=rag_index_name + "_v0")
print(collection.get(
    include=['embeddings', 'documents', 'metadatas']
                   ))

# Part 2: Query

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore

if use_Chroma:
    print("using Chroma for inference")
    db = Chroma(collection_name=rag_index_name, embedding_function=embeddings_model, persist_directory=storage_path)
elif use_Pinecone:
    print("using Pinecone for inference")
    db = PineconeVectorStore(index=pc.Index(rag_index_name), embedding=embeddings_model, text_key="text")
else:
    print("using DocArrayInMemorySearch for inference")
    # db = DocArrayInMemorySearch.from_documents(
    #     docs, 
    #     embeddings_model
    # )
    db = inMemoryIndex.vectorstore

def create_QA_chain(llm, max_doc_count):

    system_prompt = (
        "Use the given context to answer the question. "
        "If you don't know the answer, say you don't know. "
        "Use a maximum of five sentences and keep the answer concise. "
        "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    retriever = db.as_retriever(
            search_type="similarity",
            search_kwargs={"k": max_doc_count}
    )
    

    question_answer_chain = create_stuff_documents_chain(llm, prompt)
    chain = create_retrieval_chain(retriever, question_answer_chain)
    return chain

def process_query(query, llm, max_doc_count=5):
    print(query)
    print(llm)

    if use_Ollama_For_Inference:
        # if use_Pinecone:

        #     from langchain.chains import RetrievalQAWithSourcesChain
        #     text_field = "text"  # the metadata field that contains our text
        #     index = pc.Index(pinecone_index_name)
        #     vectorstore = PineconeVectorStore(
        #         index, embeddings_model, text_field
        #     )
        # else:
        #     vectorstore = DocArrayInMemorySearch.from_documents(docs, embeddings)

        # Retrieval Q&A chain with sources
        # We do similarity search and return the top 5 results
        #if use_Pinecone:
            #vectorstore = PineconeVectorStore(
            #index=index, embedding=embeddings_model)
            
            # qa = RetrievalQA.from_chain_type(
            #     llm=ollama,
            #     retriever=vectorstore.as_retriever(
            #         search_type="similarity",
            #         search_kwargs={"k": 5}
            #     )
            # )
        #else:
            #vectorstore = index.vectorstore
            # from langchain import PromptTemplate
            # # Prompt
            # template = """Use the following pieces of context to answer the question at the end.
            # If you don’t know the answer, just say that you don’t know, don’t try to make up an answer.
            # Use three sentences maximum and keep the answer as concise as possible.
            # {context}
            # Question: {question}
            # Helpful Answer:"""

            # QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],
            # template=template)
            # qa = RetrievalQA.from_chain_type(llm=ollama,
            # retriever=vectorstore.as_retriever(),
            # chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
        qa = create_QA_chain(llm, max_doc_count)
        print(qa)
        
        return qa.invoke({"input": query})
        # else:
        #     print("sorry, Ollama doesn't yet support the /v1/completions API and thus cannot be used with the index.query() method" )
    else:
        return index.query(question=query, llm=llm)

2024-06-10 10:29:11 - INFO - segment.py:create_collection:189 - Collection langchain-deeplearningai-nomic-ollama is not created.


using Chroma for inference


2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:443 "POST /batch/ HTTP/1.1" 200 15
2024-06-10 10:29:12,257 DEBUG https://us-api.i.posthog.com:4

In [302]:
query ="Please list all your shirts with sun protection in a table in markdown and summarize each one."
if use_Test_Data:
    query ="Please tell me about Nautical Navy Patio Settee. Provide the response in markdown."
query

'Please list all your shirts with sun protection in a table in markdown and summarize each one.'

In [303]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature = 0.0, base_url=inferApiBase, model=infer_model_name)
response = process_query(query=query, llm=llm)
print(response)
display(Markdown(response['answer']))

2024-06-10 10:29:32 - DEBUG - _config.py:load_ssl_context:80 - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-06-10 10:29:32 - DEBUG - _config.py:load_ssl_context_verify:146 - load_verify_locations cafile='h:\\Users\\Raphael\\OneDrive\\Perso\\Technical\\AI\\Training\\GenAI\\LangChain-for-LLM-Application-Development\\venv\\lib\\site-packages\\certifi\\cacert.pem'
2024-06-10 10:29:32 - DEBUG - _config.py:load_ssl_context:80 - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-06-10 10:29:32 - DEBUG - _config.py:load_ssl_context_verify:146 - load_verify_locations cafile='h:\\Users\\Raphael\\OneDrive\\Perso\\Technical\\AI\\Training\\GenAI\\LangChain-for-LLM-Application-Development\\venv\\lib\\site-packages\\certifi\\cacert.pem'
2024-06-10 10:29:32,611 DEBUG Starting new HTTP connection (1): localhost:11434
2024-06-10 10:29:32,611 DEBUG Starting new HTTP connection (1): localhost:11434
2024-06-10 10:29:32,611 DEBUG Starting new HTTP connection (1)

Please list all your shirts with sun protection in a table in markdown and summarize each one.
client=<openai.resources.chat.completions.Completions object at 0x000001FBDA8EE1A0> async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001FBDA8D2DA0> model_name='gemma:2b' temperature=0.0 openai_api_key=SecretStr('**********') openai_api_base='http://localhost:11434/v1' openai_proxy=''
bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001FBDA8D15A0>, search_kwargs={'k': 5}), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['cont

2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:29:36,595 DEBUG http://localhost:11434 "POST /

{'input': 'Please list all your shirts with sun protection in a table in markdown and summarize each one.', 'context': [Document(page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \r\n\r\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\r\n\r\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\r\n\r\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\r\n\r\nSun Protection That Won\'t Wear Off\r\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.', metadata={'row': 255, 'source': 'OutdoorClothingCatalog_1000.csv'}), Docu

| Name | Description |
|---|---|
| Sun Shield Shirt by 255 | SPF 50+ sun protection, lightweight, soft, and wicks moisture |
| Sunrise Tee | UPF 50+ sun protection, lightweight, wicks moisture, and dries quickly |
| Women's Tropical Tee, Sleeveless | SPF 50+ sun protection, UPF 50+ coverage, and is wrinkle-free |
| Men's Plaid Tropic Shirt, Short-Sleeve | SPF 50+ sun protection, UPF 50+ coverage, and is wrinkle-free |
| Serene Sun Hat | SPF 50+ sun protection, extra-wide brim, and is perfect for a sunny day |

In [None]:
# from langchain_openai import OpenAI

# llm = OpenAI(temperature=0, api_key=openai.api_key, base_url=openai.api_base, model=infer_model_name)

# from langchain import PromptTemplate
# vectorstore = index.vectorstore
# # Prompt
# template = """Use the following pieces of context to answer the question at the end.
# If you don’t know the answer, just say that you don’t know, don’t try to make up an answer.
# Use three sentences maximum and keep the answer as concise as possible.
# {context}
# Question: {question}
# Helpful Answer:"""

# QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],
# template=template)
# messages = QA_CHAIN_PROMPT.format_messages(question=query)
# qa = RetrievalQA.from_chain_type(llm=ollama,
# retriever=vectorstore.as_retriever(),
# chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
# qa.invoke(query)

In [None]:
# from langchain import PromptTemplate
#             # Prompt
# template = """Use the following pieces of context to answer the question at the end.
# If you don’t know the answer, just say that you don’t know, don’t try to make up an answer.
# Use three sentences maximum and keep the answer as concise as possible.
# {context}
# Question: {question}
# Helpful Answer:"""
# if use_Pinecone:
#     vectorstore = PineconeVectorStore(
#             index=index, embedding=embeddings_model
#         )

# from langchain.llms import Ollama
# ollama = Ollama(model=infer_model_name)
# QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],
# template=template)
# qa = RetrievalQA.from_chain_type(llm=ollama,
# retriever=vectorstore.as_retriever(),
# chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
# qa.invoke(query)

#### Comparing the Ollama models

| LLM          | Inference Time | Sample Output                             |
|--------------|----------------|-------------------------------------------|
| Gemma 2B     | 12s           | Sure, here's the answer:\n\nNautical Navy Patio Settee is a comfortable and weather-resistant outdoor furniture set made in the USA. It is designed to withstand harsh weather conditions and is perfect for relaxing on your patio or roof.|
| Gemma 7B     | 18s           | The provided text does not contain any information regarding the price of the Nautical Navy Patio Settee, so I am unable to answer this question from the given context.         |
| Cohere Aya 8B| 45s           | # Nautical Navy Patio Settee \n\n## Description: \nThe Nautical Navy Patio Settee is an outdoor furniture piece designed for comfort and durability. Crafted in the USA, it is made to withstand various weather conditions without warping, cracking, or rotting. The settee coordinates with the All-Weather Collection and offers low maintenance. \n\n## Dimensions: \n- Height: 32¾" \n- Width: 51½" \n- Depth: 33¼" \n\n## Additional Features: \n- Made from low-maintenance HDPE material. \n- Sunbrella cushion with a built-in drain is mildew and mold-resistant, ensuring ease of cleaning. \n- Easy to assemble and made in the USA. |
| Llama3 8B| 44s           | **Nautical Navy Patio Settee**\n============================\n\nEnjoy outdoor relaxation with our comfortable **All-Weather Settee**. Crafted with care in the USA, it is designed to provide years of comfort with low maintenance and coordinates perfectly with our All-Weather Collection.\n\n**Specs**\n--------\n\n* Dimensions: 32¾"H x 51½"W x 33¼"D\n\n**Why We Love It**\n-------------------\n\nOur All-Weather Furniture has been tested under punishing New England weather conditions at our company headquarters in Freeport, Maine for a year. Despite being fully exposed, it won’t rot, splinter, warp or crack, making it perfect for your lawn, patio or roof.\n\n**Construction**\n--------------\n\n* Love seat made from low-maintenance High-Density Polyethylene (HDPE)\n* Cushion covered with mildew- and mold-resistant Sunbrella cushion with built-in drain\n\n**Additional Features**\n----------------------\n\n* Will not splinter, crack, rot or warp for years of hassle-free use\n* Love seat is super easy to clean with soap and water; spot clean the cushion\n* Assembly required\n* Made in the USA |
| Phi3 3.8B| 17s           | # Nautical Navy Patio Settee\n\nThe **Nautical Navy Patio Settee** is an outdoor furniture piece designed for comfortable relaxation under various weather conditions. It features:\n\n- Dimensions: 32¾"H x 51½"W x 33¼"D\n\n- Made in the USA with low maintenance and durability as key attributes.\n\n- Constructed from High-Density Polyethylene (HDPE) for longevity, resisting splintering, cracking, rotting, or warping.\n\n- A mildew- and mold-resistant Sunbrella cushion with a built-in drain system.\n\n- Easy to clean using soap and water; spot clean the cushion as needed.\n\n- Requires assembly upon purchase. |
| Mistral 7B| 24s           | The Nautical Navy Patio Settee is a comfortable, all-weather love seat designed for outdoor relaxation. It is crafted with low-maintenance High-Density Polyethylene (HDPE) and features a Sunbrella cushion that is resistant to mildew and mold. The settee has been tested under harsh New England weather conditions and is known for its durability, as it won't rot, splinter, warp or crack. It coordinates perfectly with the All-Weather Collection and can be easily cleaned with soap and water. Assembly is required, and it is made in the USA.The Nautical Navy Patio Settee is a comfortable, all-weather love seat designed for outdoor relaxation. It is crafted with low-maintenance High-Density Polyethylene (HDPE) and features a Sunbrella cushion that is resistant to mildew and mold. The settee has been tested under harsh New England weather conditions and is known for its durability, as it won't rot, splinter, warp or crack. It coordinates perfectly with the All-Weather Collection and can be easily cleaned with soap and water. Assembly is required, and it is made in the USA. |


In [308]:
infer_model_name = "gemma:2b" #12s
#infer_model_name = "gemma:7b" #18s
#infer_model_name = "aya:8b" #45s
#infer_model_name = "llama3:8b" #44s
infer_model_name = "phi3:3.8b" #17s
#infer_model_name = "mistral:7b" #24s

optional_instruction = ''
if infer_model_name == "phi3:3.8b":
    optional_instruction = ". Take great attention to remove any '```markdown\n' keyword that may be at the beginning of your response"
query =f"Please describe the Women's Campside Oxfords product. Provide the response in markdown format{optional_instruction}."
#query = f'Please tell me about the Nautical Navy Patio Settee. Provide the response in markdown format{optional_instruction}.'

llm = ChatOpenAI(temperature = 0.0, base_url=inferApiBase, model=infer_model_name)
#llm = ChatOpenAI(temperature = 0.0)

response = process_query(query=query, llm=llm, max_doc_count=1)
display(response)
display(Markdown(response['answer']))

2024-06-10 10:34:13 - DEBUG - _config.py:load_ssl_context:80 - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-06-10 10:34:13 - DEBUG - _config.py:load_ssl_context_verify:146 - load_verify_locations cafile='h:\\Users\\Raphael\\OneDrive\\Perso\\Technical\\AI\\Training\\GenAI\\LangChain-for-LLM-Application-Development\\venv\\lib\\site-packages\\certifi\\cacert.pem'
2024-06-10 10:34:13 - DEBUG - _config.py:load_ssl_context:80 - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-06-10 10:34:13 - DEBUG - _config.py:load_ssl_context_verify:146 - load_verify_locations cafile='h:\\Users\\Raphael\\OneDrive\\Perso\\Technical\\AI\\Training\\GenAI\\LangChain-for-LLM-Application-Development\\venv\\lib\\site-packages\\certifi\\cacert.pem'
2024-06-10 10:34:13,319 DEBUG Starting new HTTP connection (1): localhost:11434
2024-06-10 10:34:13,319 DEBUG Starting new HTTP connection (1): localhost:11434
2024-06-10 10:34:13,319 DEBUG Starting new HTTP connection (1)

Please describe the Women's Campside Oxfords product. Provide the response in markdown format. Take great attention to remove any '```markdown
' keyword that may be at the beginning of your response.
client=<openai.resources.chat.completions.Completions object at 0x000001FBDA8D1510> async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001FBDA2FA530> model_name='phi3:3.8b' temperature=0.0 openai_api_key=SecretStr('**********') openai_api_base='http://localhost:11434/v1' openai_proxy=''
bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001FBDA8D15A0>, search_kwargs={'k': 1}), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
   

2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /api/embeddings HTTP/1.1" 200 None
2024-06-10 10:34:17,097 DEBUG http://localhost:11434 "POST /

{'input': "Please describe the Women's Campside Oxfords product. Provide the response in markdown format. Take great attention to remove any '```markdown\n' keyword that may be at the beginning of your response.",
 'context': [Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \r\n\r\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \r\n\r\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \r\n\r\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \r\n\r\

 # Women's Campside Oxfords

The **Women's Campside Oxfords** are ultra-comfortable shoes featuring a lace-to-toe design with super-soft canvas material and thick cushioning for an immediate comfortable fit upon first use. They come in regular shoe sizes, but if half sizes aren't available, the next whole size up is recommended.

## Size & Fit:
- **Order Regular Shoe Size**
- For unavailable half sizes, order one size larger.

## Specifications:
- **Approximate Weight per Pair:** 1 lb. 1 oz.
- **Construction Details:**
  - Made from soft canvas material for a comfortable break-in experience.
  - Innersole with Cleansport NXT® antimicrobial odor control and vintage hunt, fish, and camping motif.
  - Moderate arch contour in the innersole.
  - EVA foam midsole for cushioning and support.
  - Chain-tread-inspired molded rubber outsole with a modified chain-tread pattern, imported from overseas.

## OpenAI compatibility playground code

In [None]:
if use_Ollama_For_Inference:
    from openai import OpenAI

    chromaClient = OpenAI(
        base_url = 'http://localhost:11434/v1/',
        api_key='ollama', # required, but unused
    )
    chat_completion = chromaClient.chat.completions.create(
        messages=[
            {
                'role': 'system',
                'content': "0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \r\n\r\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \r\n\r\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \r\n\r\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \r\n\r\nQuestions? Please contact us for any inquiries."
            },
            {
                'role': 'user',
                'content': 'Please describe the Women\'s Campside Oxfords product. Please provide the answer in Markdown format.'
            }
        ],
        model='gemma:7b',
        stream=False,
        temperature=0.0,
        n=1
    )
    display(Markdown(chat_completion.choices[0].message.content))


#### Comparing the ChatOpenAI (with Ollama bindings) and Ollama LangChain objects
Conclusion: not much difference...

In [None]:

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.manager import CallbackManager

from langchain.llms import Ollama
ollama = Ollama(model=infer_model_name, 
                callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
                )
response = process_query(query=query, llm=ollama, max_doc_count=1)
display(response)
display(Markdown(response['answer']))



#### Testing the ollama library
Points to note:
    - The test is using the previous response, so you need to perform the previous test first
    - I haven't found a way to output any log
    - 

In [None]:
data = response['context'][0].page_content
prompt = response['input']

import ollama
output = ollama.generate(
  model="gemma:2b",
  prompt=f"Using this data: {data}. Respond to this prompt: {prompt}"
)

display(Markdown(output['response']))

In [None]:
loader = CSVLoader(file_path=file, encoding='utf-8')
if use_Ollama_For_Inference:
    loader = LimitedCSVLoader(file_path=file, encoding='utf-8')

In [None]:
docs = loader.load()

In [None]:
docs[0]

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings


if(use_Ollama_For_Inference):
    embeddings_model = OllamaEmbeddings(model=embeddings_model_name, embed_instruction='', query_instruction='')
else:
    embeddings_model = OpenAIEmbeddings()

embeddings_model

In [None]:
if use_Pinecone:
    db = PineconeVectorStore(
        index=pc.Index(rag_index_name), embedding=embeddings_model, text_key="text"
    )
else:

    db = DocArrayInMemorySearch.from_documents(
        docs, 
        embeddings_model
    )

In [None]:
query = "Please suggest a shirt with sunblocking"

In [None]:
query = "Please tell me about Nautical Navy Patio Settee"
query = "Enduring Outdoor Chair"

In [None]:

embed = embeddings_model.embed_query(query)
print(embed[:5])

In [None]:
docs = db.similarity_search_with_relevance_scores(query=query, k=1)

In [None]:
len(docs)

In [None]:
docs

In [None]:
llm = ChatOpenAI(temperature = 0.0, base_url=openai.api_base, model=infer_model_name)
#llm = ChatOpenAI(temperature = 0.0)


In [None]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [None]:
response = llm.invoke(f"{qdocs} Question: Please tell me about Nautical Navy Patio Settee. Please provide the answer in Markdown format but do not include the 'markdown' string") 


In [None]:
response

In [None]:
display(Markdown(response.content))

### Deprecation Notice
The following piece of code is soon deprecated and doesn't work with Ollama.

In [None]:
retriever = db.as_retriever()
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

#response = qa_stuff.invoke(query)

### Code that works
This piece of code works with both Ollama and OpenAI:

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

retriever = db.as_retriever()
system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use a maximum of 5 sentences and keep the answer concise. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)

In [None]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."
query = "Please tell me about Nautical Navy Patio Settee. Please provide the answer in Markdown format"

In [None]:
response = chain.invoke({"input": query})

In [None]:
response

In [None]:
display(Markdown(response['answer']))

In [None]:
response = index.query(query, llm=llm)

In [None]:
response

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])