In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain bs4
! pip install langchain-anthropic langchain_voyageai
! pip install -U langchain-google-genai
! pip install langchain-huggingface ipywidgets

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [66]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_anthropic import ChatAnthropic
from langchain_voyageai import VoyageAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace

from huggingface_hub import login
login() # You will be prompted for your HF key, which will then be saved locally

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Indexing
Load Documents -> Split -> Add chunks to Vector Store

In [None]:
#### INDEXING ####

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

66


In [28]:
from time import sleep

# Embed
vectorstore = Chroma(embedding_function=VoyageAIEmbeddings(batch_size=32, model='voyage-3'))
for i in range(0, len(splits), 10):
    print('Adding', i, i+10)
    documents = splits[i:i+10]
    vectorstore.add_documents(documents=documents)
    sleep(65)

retriever = vectorstore.as_retriever()

Adding 0 10
Adding 10 20
Adding 20 30
Adding 30 40
Adding 40 50
Adding 50 60
Adding 60 70


# Retrieval and Generation

In [None]:
#### RETRIEVAL and GENERATION ####

# Prompt
prompt = hub.pull("rlm/rag-prompt")

In [65]:
# LLM
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
# llm = ChatAnthropic(model='claude-3-opus-20240229')
# ChatGoogleGenerativeAI
# 2 RPM (requests per minute)
# 32,000 TPM (tokens per minute)
# 50 RPD (requests per day)
# llm = ChatGoogleGenerativeAI(model="gemini-pro")

llm = ChatHuggingFace(
    verbose=True,
    llm=HuggingFaceEndpoint(
        repo_id="meta-llama/Llama-3.2-1B-Instruct",
        task="text-generation",
        max_new_tokens=256,
        do_sample=False,
        repetition_penalty=1.03,
    ))


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("What is Task Decomposition?")

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


'Task decomposition involves breaking down complex tasks into multiple manageable steps or thought processes, allowing the model to interpret and understand the task better. It is achieved through techniques such as CoT (Tree of Thoughts), LLMs (Large Language Models), and prompting methods like BFS or DFS.'