In [1]:
import os
from dotenv import load_dotenv
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv("LANGCHAIN_PROJECT")
groq_api_key = os.getenv('GROQ_API')

In [2]:
from langchain_groq import ChatGroq

model = ChatGroq(model="Gemma2-9b-It", groq_api_key=groq_api_key)

In [3]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [5]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


In [6]:
import bs4
loader = WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/",
),
bs_kwargs=dict(
  parse_only = bs4.SoupStrainer(
      class_=("post-content","post-title","post-header")
  )  
),
)

documents  =loader.load()

In [8]:
#chunk the datasize because llm has sequence length

text_splitter  =RecursiveCharacterTextSplitter(chunk_size = 1000,chunk_overlap = 200)
splits = text_splitter.split_documents(documents)
vectorStore = Chroma.from_documents(documents=splits,embedding=embedding)
retriever = vectorStore.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7f9fcc48f8b0>)

In [9]:
#prompt template
system_prompt = (

    "you are an assistant for question -answering tasks."
    "use the following pieces of retrieved context to answer"
    "the question. If you don't know the answer,say that you don't know"
    "use three sentences maximum and keep the answer concise."
    "\n\n"
    "{context}"

)

prompt = ChatPromptTemplate.from_messages(

    [
        ("system", system_prompt),
        ("human","{input}"),
    ]
)

In [10]:
#create_stuff_documents_chain combines the document and send to prompt and then llm
question_answer_chain = create_stuff_documents_chain(model,prompt)
rag_chain = create_retrieval_chain(retriever,question_answer_chain)

In [11]:
response = rag_chain.invoke({"input":"What is self-reflection?"})
response

{'input': 'What is self-reflection?',
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 3. Illustration of the Reflexion framework. (Image source: Shinn & Labash, 2023)\nThe heuristic function determines when the trajectory is inefficient or contains hallucination and should be stopped. Inefficient planning refers to trajectories that take too long without success. Hallucination is defined as encountering a sequence of consecutive identical actions that lead to the same observation in the environment.\nSelf-reflection is created by showing two-shot examples to LLM and each example is a pair of (failed trajectory, ideal reflection for guiding future changes in the plan). Then reflections are added into the agent’s working memory, up to three, to be used as context for querying LLM.'),
  Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Memory stream: is a long-term memor

In [12]:
response['answer']

'Self-reflection is a process that allows autonomous agents to learn and improve by analyzing past actions and decisions. \n\nIt involves identifying mistakes, understanding the reasons behind them, and using that knowledge to refine future actions. Self-reflection is crucial for agents operating in complex, real-world environments where trial and error is common. \n\n\n\n'

In [14]:
rag_chain.invoke({"input":"How do we achieve it?"})

{'input': 'How do we achieve it?',
 'context': [Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.'),
  Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated 

the second question asked was based on first question so to tackle this we use char history

#CHAT History

In [15]:
#the retriever also know the history of chat
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder 

contextualize_q_system_prompt = (
    "Give a chat history and the latest user question "
    "which might reference context in the chat history"
    "formulate a standalone question which can be understood"
    "without the chat history. Do NOT answer the question"
    "just  formulate it if needed and otherwise return it as is."
)

contextualize_q_prompt  =ChatPromptTemplate.from_messages(

    [
        ("system",contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}"),
    ]
)

<!-- CHAT history -->

In [16]:
history_aware_retriever = create_history_aware_retriever(model,retriever,contextualize_q_prompt)
history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7f9fcc48f8b0>))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Give a chat history and the latest user question which might reference context in the chat historyformulate a standalone question which can be understoodwithout the chat history. Do NOT answer the questionjust  formulate it if need

In [18]:
qa_prompt  =ChatPromptTemplate.from_messages(

    [
        ("system",system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human","{input}"),
    ]
)

In [19]:
#create a chain
question_answer_chain = create_stuff_documents_chain(model,qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever,question_answer_chain)

In [22]:
from langchain_core.messages import AIMessage,HumanMessage
chat_history = []
question = "What is the self-reflection"

response1 = rag_chain.invoke({"input":question,"chat_history":chat_history})

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=response1['answer'])
    ]
)

question2 = "Tell me more about it?"

response2 = rag_chain.invoke({"input":question2,"chat_history":chat_history})

print(response2['answer'])


In the Reflexion framework, self-reflection is a mechanism for improving an agent's reasoning abilities. 

It works by training the LLM on examples of past failures, where each example consists of a "failed trajectory" (a sequence of actions that didn't lead to success) and an "ideal reflection" (a concise summary of what went wrong and how to improve).  These reflections are then stored in the agent's working memory and used as context when making future decisions, allowing the agent to learn from its past mistakes and make more informed choices. 





In [23]:
#passing it through chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key='input',
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [24]:
conversational_rag_chain.invoke(
    {"input":"what is Task Decomposition?"},
    config  = {
        "configurable":{'session_id':"abc123"}
    },
)['answer']

'Task decomposition is breaking down a complex task into smaller, more manageable subtasks.  \n\nThis makes it easier for an agent, like an LLM-powered autonomous agent, to plan and execute the task effectively. \n\nTechniques like Chain of Thought (CoT) and Tree of Thoughts (ToT) are used to decompose tasks. \n\n\n'

In [25]:
conversational_rag_chain.invoke(
    {"input":"where can be it useful?"},
    config  = {
        "configurable":{'session_id':"abc123"}
    },
)['answer']

"Task decomposition is useful in situations where a complex goal needs to be achieved. \n\nIt's particularly helpful for AI agents, like those powered by LLMs, as it allows them to break down large problems into smaller, more solvable steps. \n\nThis makes the task less overwhelming and improves the agent's ability to plan and execute successfully.  \n"

In [26]:
conversational_rag_chain.invoke(
    {"input":"is there any alternative of it?"},
    config  = {
        "configurable":{'session_id':"abc123"}
    },
)['answer']

'While task decomposition is a common approach, alternatives exist. \n\nOne alternative is using hierarchical reinforcement learning, where an agent learns to solve subtasks and combine them into larger goals. \n\nAnother is leveraging rule-based systems or expert systems that contain predefined procedures for accomplishing specific tasks.  \n'