In [3]:
import os
import openai
import dotenv

from tqdm import tqdm
from datasets import load_dataset

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain import hub
from langchain.document_loaders import WebBaseLoader
from langchain.prompts import PromptTemplate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

dotenv.load_dotenv()

True

In [None]:
llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
embedding = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"), model="text-embedding-3-small")

In [None]:
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)

In [None]:
# prompt = hub.pull("rlm/rag-prompt")
prompt = PromptTemplate("""
Given the following text:
""")
store = FAISS.from_documents(docs, embedding)
retriever = store.as_retriever()

In [None]:

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")