## RAG without State.

In [1]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

#llm = ChatOpenAI(model="gpt-4o-mini")
llm = ChatOpenAI(model="gpt-4o")

In [None]:
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough



# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200,
    add_start_index=True
)

all_splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(all_splits,embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever(search_type = "similarity",search_kwargs = {"k":3})

prompt = ChatPromptTemplate([
    ("system","You are a helpful assistant who provide response based on the given {context}"),
    ("user","{query}")
])

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain  = (
    {"context":retriever | format_docs,"query":RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [17]:
for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)

Task Decomposition is a process used in planning where a complicated task is broken down into smaller, more manageable steps. This approach helps an agent understand the various components of a complex task and plan effectively to accomplish it. In the context of LLM-powered autonomous agents, Task Decomposition often involves the use of the "Chain of Thought" (CoT) technique, which prompts the model to "think step by step." This technique enhances the model's performance by allowing it to utilize more computation during test time to transform big tasks into multiple manageable tasks, providing insight into the model’s thinking process.

In [18]:
rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a process used in planning where a complicated task, which usually involves many steps, is broken down into smaller and simpler steps. This approach helps an agent understand the steps involved in a task and plan ahead effectively. A common technique for task decomposition in the context of using large language models (LLMs) is the Chain of Thought (CoT) prompting. This technique instructs the model to "think step by step," allowing it to utilize more computation during test time to transform big tasks into multiple manageable tasks. This not only enhances the model\'s performance on complex tasks but also provides insight into the model\'s thinking process.'

## RAG with State.

In [19]:
### Defining the sub-chain that reformulates the query based on the chat history.

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contexualized_prompt = ChatPromptTemplate(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{query}")
    ]
)

sub_chain  = contexualized_prompt | llm | StrOutputParser()
rag_chain_with_history = sub_chain | rag_chain

In [20]:
from langchain_core.messages import HumanMessage,AIMessage

question =  "What is Task decompositiion"

response = rag_chain_with_history.invoke(
    { "chat_history":[],
     "query":question}
     )

chat_history = []

chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=response),
    ]
)

rag_chain_with_history.invoke({"chat_history":chat_history,
                               "query": "What are the ways of doing it?"})


'The different methods for performing task decomposition are:\n\n1. **By LLM with Simple Prompting**: This involves using language models with prompts such as "Steps for XYZ.\\n1." or "What are the subgoals for achieving XYZ?" to break down the task into smaller, manageable steps.\n\n2. **Using Task-Specific Instructions**: This method uses specific instructions tailored to the task at hand. For example, for writing a novel, the instruction might be "Write a story outline."\n\n3. **With Human Inputs**: This involves human intervention to decompose the task, where a person manually breaks down the task into sub-tasks or steps.'

In [27]:
from typing import Sequence

from langchain_core.messages import BaseMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict


# We define a dict representing the state of the application.
# This state has the same input and output keys as `rag_chain`.
class State(TypedDict):
    query: str
    chat_history: Annotated[Sequence[BaseMessage], add_messages]
    #context: str


# We then define a simple node that runs the `rag_chain`.
# The `return` values of the node update the graph state, so here we just
# update the chat history with the input message and response.
def call_model(state: State):
    response = rag_chain_with_history.invoke(state)
    return {
        "chat_history": [
            HumanMessage(state["query"]),
            AIMessage(response),
        ],
      # "context": response["context"],
    # "answer": response["answer"],
    }


# Our graph consists only of one node:
workflow = StateGraph(state_schema=State)
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Finally, we compile the graph with a checkpointer object.
# This persists the state, in this case in memory.
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [28]:
config = {"configurable": {"thread_id": "abc123"}}

result = app.invoke(
    {"query": "What is Task Decomposition?"},
    config=config,
)

In [29]:
result

{'query': 'What is Task Decomposition?',
 'chat_history': [HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}, id='67641b43-d558-4d16-8b32-f609a1ed44d5'),
  AIMessage(content='Task decomposition refers to the process of breaking down a complex task into smaller, more manageable steps. This approach helps to simplify the task, making it easier to tackle and solve. In the context of large language models (LLMs) and autonomous agents, task decomposition is often achieved using techniques like Chain of Thought (CoT) prompting. CoT instructs the model to "think step by step," allowing it to utilize more computation time during testing to decompose difficult tasks and provide a clearer interpretation of its reasoning process.', additional_kwargs={}, response_metadata={}, id='638d2620-4394-4d95-a2c5-be8c7a8e74e0')]}

In [None]:
result = app.invoke(
    {"query": "What is Task Decomposition?"},
    config=config,
)

In [30]:
result = app.invoke(
    {"query": "What is one way of doing it?"},
    config=config,
)

In [31]:
result

{'query': 'What is one way of doing it?',
 'chat_history': [HumanMessage(content='What is Task Decomposition?', additional_kwargs={}, response_metadata={}, id='67641b43-d558-4d16-8b32-f609a1ed44d5'),
  AIMessage(content='Task decomposition refers to the process of breaking down a complex task into smaller, more manageable steps. This approach helps to simplify the task, making it easier to tackle and solve. In the context of large language models (LLMs) and autonomous agents, task decomposition is often achieved using techniques like Chain of Thought (CoT) prompting. CoT instructs the model to "think step by step," allowing it to utilize more computation time during testing to decompose difficult tasks and provide a clearer interpretation of its reasoning process.', additional_kwargs={}, response_metadata={}, id='638d2620-4394-4d95-a2c5-be8c7a8e74e0'),
  HumanMessage(content='What is one way of doing it?', additional_kwargs={}, response_metadata={}, id='766acfb2-3365-4612-9a06-68dbed23