# Vector Stores and Retrievers
Use Vectors Database and integrate with LLM workflow

# Documents
Langchain implements a Document abstraction. It has 2 attributes: 
- page_content
- metadata

In [3]:
## https://python.langchain.com/docs/how_to/custom_retriever/
# ___________________________________________________________
# Document Setup
from langchain_core.documents import Document 

# Initialize a list of documents with page content and associated metadata
documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"type": "dog", "trait": "loyalty"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"type": "cat", "trait": "independence"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"type": "fish", "trait": "low maintenance"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"type": "bird", "trait": "intelligence"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"type": "rabbit", "trait": "social"},
    ),
]
documents

[Document(metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'type': 'fish', 'trait': 'low maintenance'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'type': 'bird', 'trait': 'intelligence'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [4]:
# Environment Setup
import os  # Import the os module for environment variable handling
from dotenv import load_dotenv  # Import dotenv to load environment variables from .env files

load_dotenv()  # Load the environment variables from .env files

groq_api_key = os.getenv("GROQ_API_KEY") # Retrieve the Groq API key from environment variables
os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN") # Retrieve the Hugging Face API key from environment variables



In [5]:
# Model Initialization
from langchain_groq import ChatGroq # Import the ChatGroq class from langchain_groq

# Initialize the language model instance
llm=ChatGroq(groq_api_key=groq_api_key,model="Llama3-8b-8192")
llm

from langchain_huggingface import HuggingFaceEmbeddings # Import for embedding generation
# Initialize HuggingFace embeddings with a specific model
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000243A333C690>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000243A32F3A10>, model_name='Llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [7]:
# VectorStores
from langchain_chroma import Chroma # Import Chroma for vector storage

# Create a vector store from the documents using embeddings
vectorstore = Chroma.from_documents(documents, embedding=embeddings)
vectorstore

: 

: 

In [None]:
# Perform a similarity search on the vector store
vectorstore.similarity_search("cat")

In [None]:
# Perform an asynchronous similarity search
await vectorstore.asimilarity_search("cat")

In [None]:
# Perform a similarity search with scores
vectorstore.similarity_search_with_score("cat")

# Retrievers 
VectorStore objects do not subclass Runnable, so can't be integrated in LCEL. However, Retrievers can (synchronous, asynchronous and batch operations). 

In [None]:
# Retriever Setup
from typing import List

from langchain_core.documents import Document # Re-import for type hinting
from langchain_core.runnables import RunnableLambda

# Create a retriever using a lambda function for similarity search
retriever=RunnableLambda(vectorstore.similarity_search).bind(k=1)

# Batch process similarity search queries
retriever.batch(["cat","dog"])

In [None]:
# Configure retriever with specific search types and parameters
retriever=vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)
# Execute batch retrieval with the configured retriever
retriever.batch(["cat","dog"])

In [None]:
# RAG (Retrieval-Augmented Generation)
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# Define a message template for RAG
message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""

# Create a prompt template from the message for conversational bots
prompt = ChatPromptTemplate.from_messages([("human", message)])

# Define a retrieval-augmented generation chain
rag_chain={"context":retriever,"question":RunnablePassthrough()}|prompt|llm

# Invoke the RAG chain with a query
response=rag_chain.invoke("Tell me about dogs")
print(response.content) # Output the content of the response