In [1]:
import os
import openai
import dotenv

from tqdm import tqdm
from datasets import load_dataset

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import ElasticsearchStore

from langchain import hub
from langchain.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

dotenv.load_dotenv()

True

In [2]:
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
embedding = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"), model="text-embedding-3-small")

In [4]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

Created a chunk of size 2731, which is longer than the specified 1000
Created a chunk of size 1538, which is longer than the specified 1000
Created a chunk of size 1380, which is longer than the specified 1000
Created a chunk of size 2352, which is longer than the specified 1000
Created a chunk of size 1953, which is longer than the specified 1000
Created a chunk of size 1067, which is longer than the specified 1000
Created a chunk of size 1475, which is longer than the specified 1000
Created a chunk of size 2881, which is longer than the specified 1000
Created a chunk of size 1980, which is longer than the specified 1000
Created a chunk of size 4145, which is longer than the specified 1000
Created a chunk of size 2528, which is longer than the specified 1000


In [5]:
prompt = hub.pull("rlm/rag-prompt")
store = ElasticsearchStore.from_documents(docs, embedding, index_name="web", es_url="http://localhost:9200")
retriever = store.as_retriever()

In [6]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition refers to the process of breaking down complex tasks into smaller, manageable subgoals, which facilitates efficient handling and execution. This can be achieved through various methods, including prompting with specific instructions or leveraging external planners for long-horizon planning. The approach enhances task performance by allowing the model to think step-by-step and explore multiple reasoning possibilities.'