In [2]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"type": "dog", "trait": "loyalty"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"type": "cat", "trait": "independence"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"type": "fish", "trait": "low maintenance"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"type": "bird", "trait": "intelligence"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"type": "rabbit", "trait": "social"},
    ),
]
# retriever = ToyRetriever(documents=documents, k=3)

In [3]:
documents

[Document(metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'type': 'fish', 'trait': 'low maintenance'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'type': 'bird', 'trait': 'intelligence'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [4]:
## Vector Store

In [8]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
groq_api_key = os.getenv("GROQ_API_KEY")

In [7]:
hf_token = os.getenv("HUGGINGFACE_TOKEN")

In [21]:
llm = ChatGroq(api_key=groq_api_key, model="gemma2-9b-it")

In [10]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
from langchain_chroma import Chroma
vectorstore = Chroma.from_documents(documents,embedding=embeddings)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x16857f0e0>

In [12]:
vectorstore.similarity_search("cat")

[Document(id='bcc14ae3-1830-4484-85dd-41ad71bd3ece', metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='b10dec9a-85b7-4e9c-ac89-497a448c117e', metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='7736da3d-31d2-45b5-9f65-7fe0b26074e3', metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='32441c85-f8f5-4da0-b116-189e6f06a623', metadata={'type': 'bird', 'trait': 'intelligence'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [13]:
await vectorstore.asimilarity_search("cat")

[Document(id='bcc14ae3-1830-4484-85dd-41ad71bd3ece', metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='b10dec9a-85b7-4e9c-ac89-497a448c117e', metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='7736da3d-31d2-45b5-9f65-7fe0b26074e3', metadata={'trait': 'social', 'type': 'rabbit'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='32441c85-f8f5-4da0-b116-189e6f06a623', metadata={'type': 'bird', 'trait': 'intelligence'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [15]:
vectorstore.similarity_search_with_score("dog")

[(Document(id='b10dec9a-85b7-4e9c-ac89-497a448c117e', metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  1.131113052368164),
 (Document(id='bcc14ae3-1830-4484-85dd-41ad71bd3ece', metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'),
  1.5269932746887207),
 (Document(id='66227171-8b33-4ed7-abd8-08e15e16c881', metadata={'type': 'fish', 'trait': 'low maintenance'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
  1.657111406326294),
 (Document(id='7736da3d-31d2-45b5-9f65-7fe0b26074e3', metadata={'trait': 'social', 'type': 'rabbit'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
  1.684274435043335)]

# Retriever 

In [None]:
# Lanchain Vectore Store objects does not have subclass Runnable, and so cannot be used directly in a chain.
# While Retriever does have Runnable,(synchronous and ansynchronus) and are designed to be
# incorporated in LCEL chain

In [16]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=2)
retriever.batch(["cat","dog"])

[[Document(id='bcc14ae3-1830-4484-85dd-41ad71bd3ece', metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'),
  Document(id='b10dec9a-85b7-4e9c-ac89-497a448c117e', metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')],
 [Document(id='b10dec9a-85b7-4e9c-ac89-497a448c117e', metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  Document(id='bcc14ae3-1830-4484-85dd-41ad71bd3ece', metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.')]]

In [None]:
## VectorStore implement an as_retiever method that will generate a retriever, specifically
# a Vectore Store Retriever. 

In [19]:
ret = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)

ret.batch(["cat","dog"])

[[Document(id='bcc14ae3-1830-4484-85dd-41ad71bd3ece', metadata={'trait': 'independence', 'type': 'cat'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(id='b10dec9a-85b7-4e9c-ac89-497a448c117e', metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')]]

In [22]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
Answer the question using the provided context only.
{question}

Context:
{context}
"""


prompt = ChatPromptTemplate.from_messages(
    [("human", message)]
)

rag_chain = {"context": ret,"question":RunnablePassthrough()} | prompt | llm

res = rag_chain.invoke("tell me about dogs")

res.content

'Dogs are great companions, known for their loyalty and friendliness. \n'