In [11]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores.lancedb import LanceDB
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import lancedb

In [12]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [13]:
vectordb = lancedb.connect("/temp/lancedb")
vector_table = vectordb.create_table("my_table",
                                    data = [{
            "vector": OpenAIEmbeddings().embed_query("Hello World"),
            "text": "Hello World",
            "id": "1",
        }], mode="overwrite")

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = LanceDB.from_documents(documents=splits, embedding=OpenAIEmbeddings(), connection = vector_table)

In [20]:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)

In [21]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [22]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [23]:
rag_chain.invoke("What is Task Decomposition?")

"Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It can be done through various methods such as using prompting techniques, task-specific instructions, or human inputs. The goal is to make the task more manageable and facilitate the interpretation of the model's thinking process."

In [26]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage

In [25]:
contextualize_q_system_propmt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_propmt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}")
    ]
)

contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

In [27]:
contextualize_q_chain.invoke(
    {
        "chat_history": [
            HumanMessage(content="what is LLM stand for?"),
            AIMessage(content="Large Language Model"),
        ],
        "question": "What is meant by Model?"
    }
)

'What is the definition of "model" in this context?'

In [29]:
qa_system_prompt = """You are an assistant for question-answering tasks.\
Use the following pieces of reterieved context to answer the question. \
If you don't Know the answer, just say that you don't know.\
Use three sentences maximum and keep the answer concise.\

{context}
"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def cotextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

rag_chain = (
    RunnablePassthrough.assign(
        context = cotextualized_question | retriever | format_docs
    )
    | qa_prompt
    | llm
)

In [30]:
chat_history = []
question = "What is Task Decomposition? "
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content = question), ai_msg])

second_question = "What are common ways of doing it?"
rag_chain.invoke({"question": second_question, "chat_history": chat_history})

AIMessage(content='There are several common ways of task decomposition. One approach is using Chain of Thought (CoT), where the model is instructed to think step by step and decompose the task into smaller subtasks. Another way is by providing task-specific instructions or prompts to guide the decomposition process. Additionally, task decomposition can also be done with human inputs, where humans provide guidance or break down the task into smaller components.')

In [31]:
chat_history

[HumanMessage(content='What is Task Decomposition? '),
 AIMessage(content='Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It allows agents or models to tackle difficult tasks by dividing them into more manageable subtasks. This approach enables the agent to think step by step and utilize more computation to effectively solve complex problems.')]