<a href="https://colab.research.google.com/github/lokeshparab/GenAI-Full-Course/blob/main/RAG/Introduction_of_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install and Import Library

In [None]:
!pip install langchain-core langchain-community langchain_groq langchain-anthropic beautifulsoup4 faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m53.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [None]:
import langchain, os ,bs4

from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_groq import ChatGroq
from langchain_anthropic import ChatAnthropic

In [None]:
from google.colab import userdata


os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
os.environ["ANTHROPIC_API_KEY"] = userdata.get('ANTHROPIC_API_KEY')

# Collection of Data

In [None]:
loader = WebBaseLoader(
    web_path="https://lilianweng.github.io/posts/2023-06-23-agent/",
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=(
                "post-header","post-title","post-content"
            )
        )
    )
)

docs = loader.load()

print("Document length:",len(docs))
print("Page content length",len(docs[0].page_content))

docs[0].metadata

Document length: 1
Page content length 43130


{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}

# Load Model Embedding

In [None]:
groq_model = ChatGroq(
    model="llama3-8b-8192"
    )
claud_model = ChatAnthropic(
    model="claude-3-5-haiku-latest"
    )
baai_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
    )


In [None]:
FAISS.from_documents(
    documents=docs,
    embedding=baai_model
)

<langchain_community.vectorstores.faiss.FAISS at 0x7e8c0f409090>

# Chuncking Methods

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
splits = text_splitter.split_documents(docs)
len(splits)

66

In [None]:
vectorstore = FAISS.from_documents(
    documents=splits,
    embedding=baai_model
)

retriever = vectorstore.as_retriever()

# Langchain Chaining

In [None]:
def format_docs(docs):
  return "/n/n".join(doc.page_content for doc in docs)

Prompt Templated website :- [Smit Lanchain](https://smith.langchain.com/hub)

In [None]:
prompt = hub.pull("rlm/rag-prompt")
prompt



ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [None]:
import pprint

prompt.messages

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]

In [None]:
pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | groq_model
    | StrOutputParser()
    )
rag_chain.invoke("What is Task Decomposition?")

'Task Decomposition is a technique that breaks down a complicated task into smaller, simpler steps. This is achieved by instructing the model to "think step by step" or by using task-specific instructions, such as "Write a story outline." The goal is to transform big tasks into multiple manageable tasks, shedding light on the model\'s thinking process.'