In [4]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

In [5]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [6]:
model = ChatOpenAI(model="gpt-4o-mini",
                   temperature=0.1,
                   max_tokens=500)

In [7]:
model.invoke("Champions trophy 2013").content

'The ICC Champions Trophy 2013 was the last edition of the tournament, held in England and Wales from June 6 to June 23, 2013. It featured eight teams: India, Australia, England, South Africa, Pakistan, Sri Lanka, New Zealand, and the West Indies.\n\nIndia emerged as the champions, winning the tournament by defeating England in the final at Edgbaston, Birmingham, on June 23, 2013. India won the match by 5 runs, successfully defending a total of 129 runs in a rain-affected game that was reduced to 20 overs per side.\n\nKey highlights of the tournament included:\n\n- **Player of the Tournament**: Shikhar Dhawan (India), who scored 338 runs in the tournament.\n- **Top Run Scorer**: Shikhar Dhawan (India) with 338 runs.\n- **Top Wicket Taker**: Mitchell Starc (Australia) with 22 wickets.\n\nThe tournament was notable for its competitive matches and the high level of cricket displayed by the participating teams.'

In [8]:
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")

In [9]:
len(embedding_model.embed_query("Champions trophy 2013"))

3072

In [10]:
url = "https://lilianweng.github.io/posts/2023-06-23-agent/"

In [11]:
from langchain_community.document_loaders import WebBaseLoader


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [12]:
wen_loader = WebBaseLoader(url)
data = wen_loader.load()

In [13]:
data[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'title': "LLM Powered Autonomous Agents | Lil'Log",
 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final results.\n\n\nMemory\

In [14]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"
]

In [15]:
docs = [WebBaseLoader(url).load() for url in urls]
docs

[[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final res

In [16]:
doc_list = [item for sublist in docs for item in sublist]

In [17]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 100, chunk_overlap=25)
doc_split = text_splitter.split_documents(doc_list)

In [18]:
doc_split

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality of final resu

In [19]:
len(doc_split)

287

In [20]:
from langchain_community.vectorstores import Chroma

In [21]:
vectorstore=Chroma.from_documents(
    documents=doc_split,
    collection_name="rag-chrome",
    embedding=embedding_model,
    persist_directory="chroma_db"
)

In [22]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

In [23]:
retriever.invoke("what is agent and prompt?")

[Document(metadata={'title': "Prompt Engineering | Lil'Log", 'source': 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/', 'description': 'Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.\nThis post only focuses on prompt engineering for autoregressive language models, so nothing with Cloze tests, image generation or multimodality models. At its core, the goal of prompt engineering is about alignment and model steerability. Check my previous post on controllable text generation.', 'language': 'en'}, page_content='Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the mode

In [24]:
from langchain.tools.retriever import create_retriever_tool
retriever_tool=create_retriever_tool(
    retriever,
    "retriever_blog_post",
    "Search and return information about Lilian Weng blog posts on LLM agents, prompt engineering, and adversarial attacks on LLMs.You are a specialized assistant. Use the 'retriever_tool' **only** when the query explicitly relates to LangChain blog data. For all other queries, respond directly without using any tool. For simple queries like 'hi', 'hello', or 'how are you', provide a normal response.",
    )

In [25]:
tools=[retriever_tool]

from langgraph.prebuilt import ToolNode
retriever_node=ToolNode(tools)

In [26]:
from typing import Annotated,Sequence, TypedDict
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages

In [27]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]

In [31]:
from langchain_core.prompts import PromptTemplate

def LLM_Decision_Maker(state:AgentState):
    print("----CALL LLM_DECISION_MAKE----")
    message=state["messages"]
    
    if len(message)>1:
        last_message=message[-1]
        question=last_message.content
        
        prompt=PromptTemplate(
        template="""You are a helpful assistant whatever question has been asked to find out that in the given question and answer.
                        Here is the question:{question}
                        """,
                        input_variables=["question"]
                        )
        chain=prompt | model
        response=chain.invoke({"question":question})
        return {"messages":[response]}
    else:
        llm_with_tool=model.bind_tools(tools)
        response=llm_with_tool.invoke(message)
        return {"messages":[response]}

In [33]:
from langchain_core.pydantic_v1 import BaseModel, Field


class grade(BaseModel):
    binary_score:str=Field(description="Relvance score 'yes' or 'no'")

In [34]:

from typing import Literal
#we use it for type of hinting


def grade_documents(state:AgentState)->Literal["Output Generator", "Query Rewriter"]:
    print("----CALLING GRADE FOR CHECKING RELEVANCY----")
    llm_with_structure_op=model.with_structured_output(grade)
    
    prompt=PromptTemplate(
        template="""You are a grader deciding if a document is relevant to a user’s question.
                    Here is the document: {context}
                    Here is the user’s question: {question}
                    If the document talks about or contains information related to the user’s question, mark it as relevant. 
                    Give a 'yes' or 'no' answer to show if the document is relevant to the question.""",
                    input_variables=["context", "question"]
                    )
     
    chain=prompt|llm_with_structure_op
     
     
    message=state['messages']
    
    last_message = message[-1]
    
    question = message[0].content
    
    docs = last_message.content
    
    scored_result=chain.invoke({"question": question, "context": docs})
    
    score=scored_result.binary_score
     
    if score=="yes":
        print("----DECISION: DOCS ARE RELEVANT----")
        return "generator"
    else:
        print("----DECISION: DOCS ARE NOT RELEVANT----")
        return "rewriter"

In [35]:

from langchain import hub

hub.pull("rlm/rag-prompt").pretty_print()


You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: [33;1m[1;3m{question}[0m 
Context: [33;1m[1;3m{context}[0m 
Answer:


In [37]:
def generate(state:AgentState):
    print("----RAG OUTPUT GENERATE----")
    
    message=state["messages"]
    question=message[0].content
    
    last_message = message[-1]
    docs = last_message.content
    
    prompt=hub.pull("rlm/rag-prompt")
    
    rag_chain=prompt | model
    
    response=rag_chain.invoke({"context": docs, "question": question})
    
    print(f"this is my response:{response}")
    
    return {"messages": [response]}
    

In [38]:
def rewrite(state:AgentState):
    print("----TRANSFORM QUERY----")
    message=state["messages"]
    
    question=message[0].content
    
    input= [HumanMessage(content=f"""Look at the input and try to reason about the underlying semantic intent or meaning. 
                    Here is the initial question: {question} 
                    Formulate an improved question: """)
       ]

    response=model.invoke(input)
    
    return {"messages": [response]}

In [39]:
from langgraph.graph import END, StateGraph, START
workflow=StateGraph(AgentState)

In [40]:
workflow.add_node("LLM Decision Maker",LLM_Decision_Maker)
workflow.add_node("Vector Retriever",retriever_node)
workflow.add_node("Output Generator",generate)
workflow.add_node("Query Rewriter",rewrite)
workflow.add_edge(START,"LLM Decision Maker")

<langgraph.graph.state.StateGraph at 0x1d22f6ca410>

In [41]:
from langgraph.prebuilt import tools_condition

workflow.add_conditional_edges("LLM Decision Maker",
                               tools_condition,
                               {"tools":"Vector Retriever",
                                END:END
                                })

workflow.add_conditional_edges("Vector Retriever",
                               grade_documents,
                               {"generator":"Output Generator",
                                "rewriter":"Query Rewriter"
                                })

<langgraph.graph.state.StateGraph at 0x1d22f6ca410>

In [42]:
workflow.add_edge("Output Generator",END)
workflow.add_edge("Query Rewriter","LLM Decision Maker")

<langgraph.graph.state.StateGraph at 0x1d22f6ca410>

In [43]:
app = workflow.compile()

In [1]:

import warnings
warnings.filterwarnings("ignore")

In [45]:
app.invoke({"messages":["what is LLM Powered Autonomous Agents explain the planning and reflection and prompt engineering explain me in terms of agents and langchain?"]})

----CALL LLM_DECISION_MAKE----
----CALLING GRADE FOR CHECKING RELEVANCY----
----DECISION: DOCS ARE RELEVANT----
----RAG OUTPUT GENERATE----
this is my response:content='LLM-powered autonomous agents utilize large language models as their core controllers, enabling them to perform tasks through planning and reflection. Planning involves strategizing actions based on goals, while reflection allows agents to learn from past experiences to improve future performance. In LangChain, prompt engineering is crucial for effectively communicating with the LLM, guiding its behavior to achieve desired outcomes without altering the model itself.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 76, 'prompt_tokens': 694, 'total_tokens': 770, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 

{'messages': [HumanMessage(content='what is LLM Powered Autonomous Agents explain the planning and reflection and prompt engineering explain me in terms of agents and langchain?', additional_kwargs={}, response_metadata={}, id='1e1a005c-930f-4371-a5f2-21cf12b0cc4b'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_JvfAKtnIAwfOiSxUCSIFB3hl', 'function': {'arguments': '{"query": "LLM Powered Autonomous Agents"}', 'name': 'retriever_blog_post'}, 'type': 'function'}, {'id': 'call_Km6rL9O36esgPjtz246HK93x', 'function': {'arguments': '{"query": "planning and reflection in LLM agents"}', 'name': 'retriever_blog_post'}, 'type': 'function'}, {'id': 'call_NWNoV1vkfW55HTzQqLItKEaW', 'function': {'arguments': '{"query": "prompt engineering in LLM agents and LangChain"}', 'name': 'retriever_blog_post'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 85, 'prompt_tokens': 160, 'total_tokens': 245, 'completion_tokens_details': {'a

In [46]:
app.invoke({"messages":["hi how are you gpt?"]})

----CALL LLM_DECISION_MAKE----


{'messages': [HumanMessage(content='hi how are you gpt?', additional_kwargs={}, response_metadata={}, id='3dde4816-ca21-464e-ad94-60d431467171'),
  AIMessage(content="Hello! I'm just a program, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 142, 'total_tokens': 172, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'id': 'chatcmpl-BkljNo18A1C4HSmbY7SrnQDFwlmif', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--6157e954-8634-4e61-850e-dc71f60dec0e-0', usage_metadata={'input_tokens': 142, 'output_tokens': 30, 'total_tokens': 172, 'input_token_details': {'audio': 0, 'cach

In [47]:

app.invoke({"messages":["what is a capital of india?"]})

----CALL LLM_DECISION_MAKE----


{'messages': [HumanMessage(content='what is a capital of india?', additional_kwargs={}, response_metadata={}, id='be1529a0-6fc2-4a68-b9b9-c91b8a4cb227'),
  AIMessage(content='The capital of India is New Delhi.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 142, 'total_tokens': 151, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_34a54ae93c', 'id': 'chatcmpl-BkljVxTEoZaIb1qwNhdbjWL35eXvt', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--16aac7db-7645-4e8a-b373-3921cff479c7-0', usage_metadata={'input_tokens': 142, 'output_tokens': 9, 'total_tokens': 151, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]}

In [48]:
question="can you explain me what is a task decomposition and why Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks?"

In [49]:
app.invoke({"messages":[question]})

----CALL LLM_DECISION_MAKE----


{'messages': [HumanMessage(content='can you explain me what is a task decomposition and why Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks?', additional_kwargs={}, response_metadata={}, id='c206a1c1-c485-4107-acf1-ebfcdf75c8bf'),
  AIMessage(content="Task decomposition refers to the process of breaking down a complex task into smaller, more manageable subtasks. This approach allows for a clearer understanding of the problem and can lead to more effective solutions, as each subtask can be addressed individually. In the context of language models and AI, task decomposition helps in organizing the thought process and guiding the model through the steps needed to arrive at a solution.\n\nChain of Thought (CoT) prompting, introduced by Wei et al. in 2022, has become a standard technique for enhancing model performance on complex tasks for several reasons:\n\n1. **Structured Reasoning**: CoT prompting encoura

query:who is sunny?

vdb: relevant doc? NO

query-> rewriter

query: who is sunny the sunny who is geneai eng and mentor and create yt video?
vdb: relevant doc? YES