In [1]:
!pip install -q langchain_community tiktoken langchain-openai langchainhub chromadb langchain

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m37.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.9/62.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.9/18.9 MB[0m [31m94.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m21.5 MB/s[0m eta [36m0:00:00

In [None]:
import os
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]="<API_KEY"
os.environ["LANGCHAIN_PROJECT"]="RAG_ADVANCED"
os.environ['OPENAI_API_KEY'] = "<API_KEY>"


from langsmith import utils
utils.tracing_is_enabled()

True

In [3]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate



In [4]:
#Load Documents
loader = WebBaseLoader(
    web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/", ),
    bs_kwargs = dict(
        parse_only = bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [5]:
#Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 300, chunk_overlap=50)
splits = text_splitter.split_documents(docs)

#Embed
embedding = OpenAIEmbeddings(model = "text-embedding-3-small")

vectorestore = Chroma.from_documents(documents=splits, embedding = embedding, persist_directory="./db001")

#Retriever
retriever = vectorestore.as_retriever()

In [7]:
def format_docs(docs):
  context = "\n\n".join(doc.page_content for doc in docs)
  return context

#Define LLM and Prompt for Answering
llm = ChatOpenAI(model = "gpt-4o-mini", temperature=0)
#Prompt
answer_prompt = ChatPromptTemplate.from_template("""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say you do not know.
Use three sentences maximum and keep the answer concise.

Question:{question}
Context:{context}
Answer:
"""
)

asnwer_chain = answer_prompt | llm | StrOutputParser()

In [9]:
rewrite_prompt = ChatPromptTemplate.from_template(
    """You are a helpful assistant that improves user queries for information retrieval.

    Given a user's input, reqrite it into a clear, standalone question that can be used to rewrite relevant documents.
    Remove any conversational fluff or non-essential context.

    User input:
    {question}

    Rewrite question:"""
)
rewrite_chain = rewrite_prompt | ChatOpenAI(model = "gpt-4o-mini", temperature=0) | StrOutputParser()

#RAG Flow with rewrite
query = "I am a data scientist, I take a lot of trainings and right now learning langchain. Can you tell me what is task decomposition for LLM agents in Langchain?"

#Rewrite the query
rewritten_query = rewrite_chain.invoke({"question":query})
print("Rewritten Query:", rewritten_query)

#Get relavant documents for the rewritten query
retrieved_docs = retriever.invoke(rewritten_query)

#Format context and sources
context = format_docs(retrieved_docs)

#Get final answer from LLM
answer = asnwer_chain.invoke({"context":context, "question":rewritten_query})

#Print Results
print("Original query:", query)
print("Answer: ", answer)

Rewritten Query: What is task decomposition for LLM agents in Langchain?
Original query: I am a data scientist, I take a lot of trainings and right now learning langchain. Can you tell me what is task decomposition for LLM agents in Langchain?
Answer:  Task decomposition for LLM agents in Langchain involves breaking down complex tasks into smaller, manageable subgoals. This process is facilitated by the LLM, which parses user requests into multiple tasks with attributes like task type, ID, dependencies, and arguments. Decomposition can be achieved through simple prompting, task-specific instructions, or human inputs.
