In [1]:
from langchain_core.documents import Document
document = [
    Document(
        page_content="Dogs are great companions",
        metadata = {"source":"mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets",
        metadata = {"source":"mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginner",
        metadata = {"source":"fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimiking human speech",
        metadata = {"source":"birds-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social aniamls that need plenty of space to hop around.",
        metadata = {"source":"mammal-pets-doc"},
    ),
]

In [2]:
document

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
 Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginner'),
 Document(metadata={'source': 'birds-pets-doc'}, page_content='Parrots are intelligent birds capable of mimiking human speech'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social aniamls that need plenty of space to hop around.')]

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv("LANGCHAIN_PROJECT")
groq_api_key = os.getenv('GROQ_API')
os.environ["HF_TOKEN"] = os.getenv("HF")

In [4]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="Llama3-8b-8192", groq_api_key=groq_api_key)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x7f0dadb69a80>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7f0dadb6a740>, model_name='Llama3-8b-8192', groq_api_key=SecretStr('**********'))

all-MiniLM-L6-v2 : It maps sentences and paragraph to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [6]:
##vectorStores
from langchain_chroma import Chroma

vectorestore = Chroma.from_documents(document,embedding=embedding)


In [7]:
vectorestore

<langchain_chroma.vectorstores.Chroma at 0x7f0ca87d26b0>

In [8]:
vectorestore.similarity_search("cat")

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social aniamls that need plenty of space to hop around.'),
 Document(metadata={'source': 'birds-pets-doc'}, page_content='Parrots are intelligent birds capable of mimiking human speech')]

In [9]:
##async query

await vectorestore.asimilarity_search("cat")

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social aniamls that need plenty of space to hop around.'),
 Document(metadata={'source': 'birds-pets-doc'}, page_content='Parrots are intelligent birds capable of mimiking human speech')]

In [10]:
vectorestore.similarity_search_with_score("cat")

[(Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets'),
  0.8409360647201538),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions'),
  1.3763904571533203),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social aniamls that need plenty of space to hop around.'),
  1.5754406452178955),
 (Document(metadata={'source': 'birds-pets-doc'}, page_content='Parrots are intelligent birds capable of mimiking human speech'),
  1.6277443170547485)]

## Retrievers
Langchain vectore store objects do not subclass Runnable and so cannot be immediately be integrated into Langchain expression Language chains.

So we use Langchain Retrievers

In [11]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorestore.similarity_search).bind(k=1)
retriever.batch(["cat","dog"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions')]]

VectoreStoreRetriever which include specific search_type and search_kwargs attributes that identofy what methods of the underlying vector store to call

In [12]:
retriever = vectorestore.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":1}
)
retriever.batch(['cat','dog'])


[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions')]]

In [13]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
          Answer this question using the provided context only.
          {question}

          Context:
          {context}

"""
prompt = ChatPromptTemplate.from_messages([("human",message)])
rag_chain = {"context":retriever,"question":RunnablePassthrough()}| prompt|llm
response = rag_chain.invoke("tell me about dogs")
print(response.content)


According to the provided context, dogs are great companions.
