# Project Agentic rag

In [1]:
from dotenv import load_dotenv
import os
load_dotenv()
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from pydantic import BaseModel,Field
from langgraph.graph.message import add_messages
from typing import Literal,Annotated,Sequence,TypedDict
from langgraph.graph import StateGraph
from langgraph.graph.message import add_messages
from langchain.document_loaders import WebBaseLoader,PyPDFLoader
from pinecone import Pinecone,ServerlessSpec
from pinecone_text.sparse import BM25Encoder


  from .autonotebook import tqdm as notebook_tqdm
USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# load embedding model
embedding_model  = HuggingFaceEmbeddings(model= "BAAI/bge-base-en-v1.5")
llm = ChatGroq(model="openai/gpt-oss-20b")

In [3]:
len(embedding_model.embed_query("Hi"))

768

In [4]:
# load data 
# taking mutiple url
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"
]
docs =[]

In [6]:
internet_docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in internet_docs for item in sublist]
docs_list

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resu

In [None]:

from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500,chunk_overlap= 25)

chunks = splitter.split_documents(docs_list)


In [8]:
chunks[0]

Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resul

In [None]:
# Replace with your actual API key


pc = Pinecone(api_key=pinecone_api_key)

In [10]:
# perform upsert data in pinecone
# using upsert we perform reindexing create syschronisation between data source and vectorstore

BM_encoder = BM25Encoder()

# give complete document to bm_encoder
corpus= [chunk.page_content for chunk in chunks]
BM_encoder.fit(corpus)
BM_encoder.dump("bm25_values.json")

# normalise vector as i am using dotproduct
import numpy as np
def normalize(vector):

    """Normalize the vector to unit length"""

    norm = np.linalg.norm(vector)

    if norm == 0:
        return vector
    return vector/norm


  0%|          | 0/52 [00:00<?, ?it/s]

100%|██████████| 52/52 [00:00<00:00, 482.84it/s]


In [11]:
# add datavector  in pinecone database
upsert_data = []

for i,chunk in enumerate(chunks):
    
    # dense vector
    dense_vec = embedding_model.embed_query(chunk.page_content)

    # applying normalization for performaing consine similarty with dot product
    normalise_dense_vector = normalize(np.array(dense_vec))

    # sparse vector
    sparse_vec = BM_encoder.encode_documents(chunk.page_content)

    upsert_data.append({
        "id":f"chunk-{i}",
        "values": normalise_dense_vector.tolist(),
        "sparse_values":sparse_vec,
        "metadata":{"text":chunk.page_content,
                     **chunk.metadata}
    })

In [12]:
# create database
index_name = "agenticrag"
dimension = 768
metric = "dotproduct"


# pc.create_index(
#     name = index_name,
#     dimension = dimension,
#     metric = metric,
#     spec=ServerlessSpec(cloud="aws",region="us-east-1")
# )

In [13]:
index = pc.Index(index_name)

In [14]:
# save all chuncks into pinecone
if upsert_data:
    index.upsert(vectors=upsert_data)
    print(f"Upserted {len(upsert_data)} vectors to index.")

Upserted 52 vectors to index.


In [15]:
upsert_data

[{'id': 'chunk-0',
  'values': [-0.021165801862815557,
   -0.015828427883587105,
   0.030010361368569268,
   -0.00617679592241934,
   0.052970410085346586,
   -0.02618641442610456,
   0.030595794430739096,
   0.017115824807080026,
   -0.015490985519740002,
   -0.0023272501834733306,
   -0.02648984861676442,
   0.0261874444688131,
   -0.055932548419515025,
   0.0018878411584761334,
   -0.011036727876772304,
   0.03647564887745571,
   0.02278500106096668,
   0.010700183307835719,
   0.021826424317418807,
   -0.020517493354588738,
   -0.005234342700021215,
   -0.017314900583939818,
   0.04519712293615136,
   0.030585002265362447,
   0.03597023132102461,
   0.03674718154647148,
   0.024914633918648548,
   0.03284720515865915,
   -0.021743287018588738,
   0.018812768946662974,
   0.012003378255257194,
   -0.046240312193403015,
   0.024583964132176228,
   0.03460379491779528,
   0.010879314812902195,
   0.025179295290102273,
   -0.07609837811535664,
   -0.022522502220407376,
   -0.0236257506

In [16]:
# retrieve document
query = "what is prompt engineering?"

dense_query = embedding_model.embed_query(query)

normalize_data = normalize(np.array(dense_query))

sparse_query = BM_encoder.encode_queries([query])[0]

# filter by meta data
source_filter = {"source":{"$eq":'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/'}}
# Query Pinecone
response = index.query(
    vector=normalize_data.tolist(),
    sparse_vector=sparse_query,
    top_k=5,
    filter= source_filter,
    include_metadata=True
)



In [17]:
###############################
# Develop agentic rag

In [18]:
# develop Tool for retriever
from typing import Optional,Type,ClassVar
#Pydantic class
class PineconeInput(BaseModel):
    query:str=Field(description="The user query string")
    filter_source:Optional[str]=Field(None,description="Optinal metadata filter for source document")
    alpha:float=Field(0.5,description="Weighting for dense vs. sparse search (0.0 to 1.0). Default is 0.5.")
    top_k: int = Field(4, description="The number of documents to return. Default is 4.")


In [None]:
# Retriever Tool
from langchain.tools import BaseTool
from langchain_community.retrievers import PineconeHybridSearchRetriever
from langchain_community.document_compressors import CohereRerank 

BM_encoder.fit(corpus)

BM_encoder.dump("bm25_values.json")

class PineconeRetrieverTool(BaseTool):

    print("-------------Retriever Call-------------------")
    name:str= "hybrid_pinecone_document_search"

    description:str = (
        "Useful for performing a hybrid search (semantic and keyword) on company documents. "
        "Can take an optional 'filter_source' and 'alpha' parameter to tune the search."
    )
    args_schema: Type[BaseModel] = PineconeInput

    dense_embeddings: ClassVar = HuggingFaceEmbeddings(
        model_name="BAAI/bge-base-en-v1.5"
    )

    sparse_encoder:ClassVar=  BM25Encoder().load("bm25_values.json")

    pc:ClassVar = Pinecone(api_key=pinecone_api_key)

    index:ClassVar = pc.Index("agenticrag") 


    # define run method for sync 
    def _run(self,query:str,filter_source:Optional[str]=None,alpha: float = 0.5,top_k: int =4) -> str:

        # normalise dense vector
        normalise_query = normalize(np.array(self.dense_embeddings.embed_query(query)))

        retriever = PineconeHybridSearchRetriever(
            embeddings= self.dense_embeddings,
            sparse_encoder=self.sparse_encoder,
            index = self.index,
            alpha= alpha,
            text_key="text",
        )

        search_kwarge={}
        if filter_source:
            search_kwarge["filter"] ={"source": {"$eq": filter_source}}

        # Mannually add embedding query
        search_kwarge["vector"] = normalise_query
        search_kwarge["k"] = top_k
        relevant_doc = retriever.invoke(query,config = {"configurable":search_kwarge})
        print(relevant_doc)
        return "\n".join([doc.page_content for doc in relevant_doc])

        
  # define run method for async 
    async def _arun(self,query:str,filter_source:Optional[str]=None,alpha: float = 0.5,top_k: int =4) -> str:

        # normalise dense vector
        normalise_query = normalize(await np.array(self.dense_embeddings.embed_query(query)))

        retriever = PineconeHybridSearchRetriever(
            embeddings= self.dense_embeddings,
            sparse_encoder=self.sparse_encoder,
            index = self.index,
            alpha= alpha,
            text_key="text",## correct key for document that is loaded  
    
        )

        search_kwarge={}
        if filter_source:
            search_kwarge["filter"] ={"source": {"$eq": filter_source}}

        # Mannually add embedding query
        search_kwarge["vector"] = normalise_query

        search_kwarge["k"] = top_k

        relevant_doc = await retriever.ainvoke(query,config = {"configurable":search_kwarge})

        return "\n".join([doc.page_content for doc in relevant_doc])




100%|██████████| 52/52 [00:00<00:00, 681.71it/s]


-------------Retriever Call-------------------


In [21]:
test_tool = PineconeRetrieverTool()

response = test_tool._run(query="what is prompt engineering?",filter_source='https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/')

[Document(metadata={'description': 'Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.\nThis post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models. At its core, the goal of prompt engineering is about alignment and model steerability. Check my previous post on controllable text generation.', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/', 'title': "Prompt Engineering | Lil'Log", 'score': 0.770914197}, page_content="Prompt Engineering | Lil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n|

In [22]:
# define addition tool
class Add_Input(BaseModel):
    a:float = Field(description="The first number for addition")
    b:float = Field(description="The second number for addition")


In [24]:
class AddTool(BaseTool):

    print("Addition Tool")
    name:str="add_number"

    description:str="Add two floatting number"

    args_schema:Type[BaseModel] =Add_Input

    def _run(self, a: float, b: float) -> str:
        """Use the tool synchronously."""
        return str(a + b)

    async def _arun(self, a: float, b: float) -> str:
        """Use the tool asynchronously."""
        # For a simple operation like addition, the async implementation is straightforward.
        return str(a + b)
    


Addition Tool


In [27]:
# tool is working
add_tool = AddTool()
add_tool._run(6,3)

'9'

In [28]:
from langchain_community.tools import DuckDuckGoSearchRun
search = DuckDuckGoSearchRun()

In [36]:
retriever_tool = PineconeRetrieverTool()
add_tool = AddTool()

In [37]:
tools = [retriever_tool,search,add_tool]

# Creating Node

In [38]:
from langgraph.prebuilt import ToolNode
from langchain_core.messages import BaseMessage

In [43]:
retriever_node= ToolNode(tools)

In [44]:
#create custom state
class AgenticState(TypedDict):
    messages: Annotated[Sequence[BaseMessage],add_messages]

In [45]:
llm_with_tools = llm.bind_tools(tools)

# Develop Orchestration

In [None]:
from langchain_core.prompts import PromptTemplate
# LLM Decision Maker
class Agentic_Rag():

    def __init__(self,query:str,state:AgenticState):

        self.query=query
        self.state = state

    def LLM_Decision_Maker(self):

        print("-------------LLM Decision Maker----------")
        message = self.state["messages"]
        if len(message) >1:

            last_message = message[-1]
            question = last_message.content

            prompt=PromptTemplate(
                template="""You are a helpful assistant whatever question has been asked to find out that in the given question and answer.
                        Here is the question:{question}
                        """,
                        input_variables=["question"]
                        )
            chain=prompt | llm
            response=chain.invoke({"question":question})
            return {"messages":[response]}
        else:
            response = llm_with_tools.invoke(message)
        

        
