# RAG From Scratch: Overview

Set Environment Vars and API Keys

In [1]:
# ! pip install python-dotenv

In [2]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [3]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHIAN_PROJECT'] = 'advanced-rag'
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY","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")
os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY","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")

# Part 1 : Overview

In [4]:
# ! pip install beautifulsoup4 langchain

In [5]:
import bs4
from langchain import hub 
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_groq import ChatGroq

USER_AGENT environment variable not set, consider setting it to identify your requests.


### Indexing

In [6]:
# Load Documents

loader = WebBaseLoader(
    web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs = dict(
        parse_only = bs4.SoupStrainer(
            class_ = ("post-content","post-title","post-header")
        )
    )


)
docs=loader.load()

### Split : Chunking

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
                                               chunk_overlap=200)
splits = text_splitter.split_documents(docs)

### Embed

In [8]:
model_name = "BAAI/bge-small-en"
model_kwargs = {"device":"cpu"}
encode_kwargs = {"normalize_embeddings": True}


hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)


vectorstore = FAISS.from_documents(documents=splits,
                                   embedding=hf_embeddings)


# retriever = vectorstore.as_retriever()  # Dense Retrieval - Embeddings/Context based



  from tqdm.autonotebook import tqdm, trange


In [9]:
retriever = vectorstore.as_retriever()

### Retrieval & Generation

In [36]:
#prompt
prompt = hub.pull("rlm/rag-prompt")
print(prompt)

input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [37]:
#llm
llm=ChatGroq(model="llama3-8b-8192", temperature=0)

#post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#Chain
rag_chain = (
    {
        "context"  : retriever | format_docs, 
        "question" : RunnablePassthrough()
    } 
    | prompt
    | llm
    | StrOutputParser()
)

print(rag_chain.invoke("What is task decomposition?"))


Task decomposition is the process of breaking down a complex task into smaller, simpler steps. This is done to make the task more manageable and to allow for more efficient planning and execution.


# Part 2: Indexing

In [38]:
# Documents
question = "What kinds of pets do I like?"
document = "My favorite pet is a cat."

### Document Loader

In [39]:
# Indexing 

import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only = bs4.SoupStrainer(
            class_ = ("post-content", "post-title", "post-header")
        )
    )

)

# load 

docs=loader.load()

### Splitter

In [40]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)
splits=text_splitter.split_documents(docs)

### Text Embedding 

In [41]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-small-en"
model_kwargs = {"device":"cpu"}
encode_kwargs = {
    "normalize_embeddings":True
}

hf_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

query_result = hf_embeddings.embed_query(question)
document_result = hf_embeddings.embed_query(document)
len(query_result)

384

In [42]:
import numpy as np

def cosine_similarity(a,b):
    dot_product = np.dot(a,b)
    a_norm = np.linalg.norm(a)
    b_norm = np.linalg.norm(b)
    cosine_distance = dot_product/(a_norm*b_norm)
    return cosine_distance

similarity = cosine_similarity(query_result,document_result)
print("Similarity Score : ", similarity)
    

Similarity Score :  0.902305234006825


### Vectorstores

In [43]:
### Index
from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_documents(
    documents=splits, embedding=hf_embeddings
)

retriever=vectorstore.as_retriever()

retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000198A97E49D0>, search_kwargs={})

### Retrieval

In [44]:
docs = retriever.get_relevant_documents("What is Task Decomposition?")

In [45]:
print(len(docs))
docs

4


[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search

### Generation

In [46]:
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate


# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template=template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [47]:
### llm
llm = ChatGroq(model="llama3-8b-8192", temperature=0)


In [48]:
# Chain

chain = prompt | llm

In [49]:
chain.invoke({'context' : docs,
              'question' : "What is Task Decomposition?"
}  )

AIMessage(content='According to the provided context, Task Decomposition is a process where an agent breaks down a complicated task into smaller and simpler steps. This can be done using various techniques, such as:\n\n1. Chain of Thought (CoT): instructing the model to "think step by step" to decompose hard tasks into smaller steps.\n2. Tree of Thoughts (Yao et al. 2023): decomposing the problem into multiple thought steps and generating multiple thoughts per step, creating a tree structure.\n3. Simple prompting: using LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", or using task-specific instructions.\n\nTask decomposition can be used to transform big tasks into multiple manageable tasks, allowing the agent to plan ahead and make more informed decisions.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 164, 'prompt_tokens': 997, 'total_tokens': 1161, 'completion_time': 0.136666667, 'prompt_time': 0.13402

In [54]:
prompt = hub.pull("rlm/rag-prompt")

#post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
            {"context" : retriever | format_docs,
             "question" : RunnablePassthrough()
             }
            | prompt
            | llm
            | StrOutputParser()
        

)
rag_chain.invoke("What is task decomposition?")
    

'Task decomposition is the process of breaking down a complex task into smaller, simpler steps. This is achieved by instructing the model to "think step by step" or by using techniques such as Chain of Thought or Tree of Thoughts.'