# Retrieval Augmented Generation (RAG)



In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

True

In [2]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

### 1. LOAD

In [3]:
from langchain_community.document_loaders import WebBaseLoader
import bs4


# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [4]:
print(docs[0].page_content[:500])



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


### 2. SPLIT

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [6]:
len(splits)

66

In [7]:
len(splits[0].page_content)

969

In [8]:
splits[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}

### 3. EMBED AND STORE

In [9]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

!rm -rf ./chroma  # remove old database files if any

persist_directory = "./chroma/"


embedding = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embedding, 
                                    persist_directory=persist_directory)


### 4. RETRIEVE

In [10]:
# search_type available "similarity", "mmr", "similarity_score_threshold"
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [11]:
retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")

In [12]:
len(retrieved_docs)

6

In [13]:
for retrieved_doc in retrieved_docs:
    print(retrieved_doc)

page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.\nTask decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.' metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}
page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022)

In [14]:
print(retrieved_docs[0].page_content)

Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.


### 5. CHATMODEL

In [15]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

### 6. SYSTEM PROMPT

In [16]:
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:
"""


prompt = PromptTemplate.from_template(template)

### 7. RAG CHAIN

In [17]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

### 8. FINAL RESPONSE

In [18]:
rag_chain.invoke("Who is the author of this article?")

'The author of the article "LLM-powered Autonomous Agents" is Lilian Weng. Thanks for asking!'

In [19]:
# Memory implementation is needed!

rag_chain.invoke("Who?")

'I don\'t have enough context to provide a specific answer to the question "Who?" in this scenario. Thanks for asking!'