In [1]:
# Generating Sample Documents
from langchain_core.documents import Document

In [2]:
documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [3]:
documents

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [5]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq

In [27]:
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN") # For Groq embeddings
llm = ChatGroq(groq_api_key=groq_api_key, model="llama-3.1-8b-instant")
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001F5934E3BC0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001F5934E3020>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [7]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Vector Stores
from langchain_chroma import Chroma
vectordb = Chroma.from_documents(documents, embeddings)
vectordb

<langchain_chroma.vectorstores.Chroma at 0x1f58f46ae70>

In [13]:
vectordb.similarity_search("Who need extra space?", k=2)

[Document(id='c221aefe-33b6-4f50-80f8-14da849afc6e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='e1ebb622-c941-4b8c-850e-201d76d4afae', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [15]:
vectordb.similarity_search_with_score("cat")

[(Document(id='c221aefe-33b6-4f50-80f8-14da849afc6e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.9351057410240173),
 (Document(id='d95e4016-c711-4ef5-9da6-00f514d39a85', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  1.5740898847579956),
 (Document(id='e1ebb622-c941-4b8c-850e-201d76d4afae', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
  1.5956902503967285),
 (Document(id='4e16ad65-2e6f-4fe5-a35c-6adb7768d273', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
  1.6657923460006714)]

## Retrievers

In [17]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda


In [18]:
retriever = RunnableLambda(vectordb.similarity_search).bind(k=2)
retriever.batch(['cat','dog'])

[[Document(id='c221aefe-33b6-4f50-80f8-14da849afc6e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  Document(id='d95e4016-c711-4ef5-9da6-00f514d39a85', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')],
 [Document(id='d95e4016-c711-4ef5-9da6-00f514d39a85', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  Document(id='c221aefe-33b6-4f50-80f8-14da849afc6e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')]]

In [19]:
# another way of retrievers
retriever = vectordb.as_retriever(
    search_type="similarity", search_kwargs={"k":2}
    )
retriever.get_relevant_documents("cat")

  retriever.get_relevant_documents("cat")


[Document(id='c221aefe-33b6-4f50-80f8-14da849afc6e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='d95e4016-c711-4ef5-9da6-00f514d39a85', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')]

In [20]:
# RAG
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer the question based on the context below.
{question}

Context:
{context}
"""

In [21]:
prompt = ChatPromptTemplate.from_messages(['human',message])

In [28]:
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
responce = rag_chain.invoke("tell me about dogs and cats")
print(responce.content)

It seems there's not much information about dogs and cats in the provided context, but I can try to answer your question based on what I can infer:

- **Dogs**: They are great companions, known for their loyalty and friendliness.
- **Cats**: They are independent pets that often enjoy their own space.
