In [1]:
import langchain

In [2]:
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
from dotenv import load_dotenv,find_dotenv

In [4]:
load_dotenv(find_dotenv())

True

In [5]:
import os

In [6]:
os.getenv("GROQ_API_KEY")

'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'

In [7]:
os.environ["GROQ_API_KEY"] =  os.getenv("GROQ_API_KEY")

In [8]:
import bs4

## Collection of data

In [9]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        #filter specific parts of the webpage, improving efficiency.
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

In [10]:
docs=loader.load()

In [11]:
docs[0].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}

In [53]:
# print(docs[0].page_content)

In [14]:
llm = ChatGroq(model='llama3-8b-8192')

In [15]:
model_name = 'BAAI/bge-small-en'

In [18]:
model_kwargs = {'device' : 'cpu'}
encode_kwargs = {'normalize_embedings' : True}

In [21]:
hf_embeddings = HuggingFaceBgeEmbeddings(model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs)

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [32]:
# create vector store without chunking the data
FAISS.from_documents(documents=docs,embedding=hf_embeddings)

<langchain_community.vectorstores.faiss.FAISS at 0x1852ba70dd0>

In [23]:
len(docs)

1

In [24]:
len(docs[0].page_content)

43130

In [25]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [54]:
# splits

In [26]:
len(splits)

66

In [33]:
# create vector store with chunking the data
vector_store = FAISS.from_documents(documents=splits,embedding=hf_embeddings)

In [34]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x1852ba71220>

In [35]:
retriever = vector_store.as_retriever()

In [39]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceBgeEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001852BA71220>, search_kwargs={})

In [44]:
from pprint import pprint

In [46]:
prompt = hub.pull("rlm/rag-prompt")
prompt



ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [47]:
pprint(prompt)

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])


In [48]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [49]:
from langchain_core.runnables import RunnablePassthrough

In [51]:
# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [52]:
rag_chain.invoke("What is Task Decomposition?")

"Task Decomposition is a process that breaks down a complicated task into smaller, simpler steps. This is done to enable more manageable tasks and provide insight into the model's thinking process. It can be achieved through techniques such as Chain of Thought or Tree of Thoughts."