# RAG from Scratch

## Enviroment

- OPENAI api와 langchain api는 `.env` 파일에 저장하기

In [2]:
!pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain

Collecting langchainhub
  Downloading langchainhub-0.1.17-py3-none-any.whl.metadata (621 bytes)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.32.0.20240602-py3-none-any.whl.metadata (1.8 kB)
Downloading langchainhub-0.1.17-py3-none-any.whl (4.8 kB)
Downloading types_requests-2.32.0.20240602-py3-none-any.whl (15 kB)
Installing collected packages: types-requests, langchainhub
Successfully installed langchainhub-0.1.17 types-requests-2.32.0.20240602


In [1]:
import bs4

from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()



In [6]:
# split 

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                                chunk_overlap=200) 

splits = text_splitter.split_documents(docs)    

In [7]:
# Embed 

vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

In [13]:
### RETRIEVAL and GENERATION ### 

# Prompt 

prompt = hub.pull("rlm/rag-prompt")

# LLM 

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [14]:
# post-processing 

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs
    )

# Chain

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt 
    | llm
    | StrOutputParser()
)     

# Q

rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a technique used to break down complex tasks into smaller and simpler steps. This approach allows agents to better plan and execute tasks efficiently. Task decomposition can be achieved through various methods such as prompting with specific instructions or utilizing human inputs.'