In [None]:
!pip install pandas langchain langchain-community sentence-transformers faiss-cpu "transformers[agents]"

In [None]:
pip install "git+https://github.com/huggingface/transformers.git#egg=transformers[agents]"

In [None]:
# Import necessary modules
import pandas as pd
import datasets
from transformers import AutoTokenizer
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.utils import DistanceStrategy
from tqdm import tqdm
from transformers.agents import Tool, HfEngine, ReactJsonAgent
from huggingface_hub import InferenceClient
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [None]:
# Load the knowledge base
knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")

In [None]:
knowledge_base

In [None]:
# Convert dataset to Document objects
source_docs = [
    Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]})
    for doc in knowledge_base
]

logger.info(f"Loaded {len(source_docs)} documents from the knowledge base")

In [None]:
source_docs

In [None]:
# Initialize the text splitter
tokenizer = AutoTokenizer.from_pretrained("thenlper/gte-small")
text_splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer(
    tokenizer,
    chunk_size=200,
    chunk_overlap=20,
    add_start_index=True,
    strip_whitespace=True,
    separators=["\n\n", "\n", ".", " ", ""],
)

In [None]:
# Split documents and remove duplicates
logger.info("Splitting documents...")
docs_processed = []
unique_texts = {}
for doc in tqdm(source_docs):
    new_docs = text_splitter.split_documents([doc])
    for new_doc in new_docs:
        if new_doc.page_content not in unique_texts:
            unique_texts[new_doc.page_content] = True
            docs_processed.append(new_doc)

logger.info(f"Processed {len(docs_processed)} unique document chunks")

In [None]:
# Initialize the embedding model
logger.info("Initializing embedding model...")
embedding_model = HuggingFaceEmbeddings(model_name="thenlper/gte-small")

# Create the vector database
logger.info("Creating vector database...")
vectordb = FAISS.from_documents(
    documents=docs_processed,
    embedding=embedding_model,
    distance_strategy=DistanceStrategy.COSINE,
)

logger.info("Vector database created successfully")

### Building the RetrieverTool


In [None]:
class RetrieverTool(Tool):
    name = "retriever"
    description = "Using semantic similarity, retrieves some documents from the knowledge base that have the closest embeddings to the input query."
    inputs = {
        "query": {
            "type": "text",
            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
        }
    }
    output_type = "text"

    def __init__(self, vectordb, **kwargs):
        super().__init__(**kwargs)
        self.vectordb = vectordb

    def forward(self, query: str) -> str:
        assert isinstance(query, str), "Your search query must be a string"

        docs = self.vectordb.similarity_search(
            query,
            k=7,
        )

        return "\nRetrieved documents:\n" + "".join(
            [f"===== Document {str(i)} =====\n" + doc.page_content for i, doc in enumerate(docs)]
        )

# Create an instance of the RetrieverTool
retriever_tool = RetrieverTool(vectordb)

In [None]:
retriever_tool

In [None]:
pip install huggingface_hub

In [None]:
from huggingface_hub import notebook_login

In [None]:
notebook_login()

In [None]:
# Initialize the language model engine
llm_engine = HfEngine("meta-llama/Meta-Llama-3-8B-Instruct")

### IF YOU WANT TO USE OPENAI

In [None]:
import os
from openai import OpenAI

from typing import List, Dict
from transformers.agents.llm_engine import MessageRole, get_clean_message_list
from huggingface_hub import InferenceClient

openai_role_conversions = {
    MessageRole.TOOL_RESPONSE: MessageRole.USER,
}


class OpenAIEngine:
    def __init__(self, model_name="gpt-4o"):
        self.model_name = model_name
        self.client = OpenAI(
            api_key="YOUR-API-KEY"#os.getenv("OPENAI_API_KEY"),
        )

    def __call__(self, messages, stop_sequences=[]):
        messages = get_clean_message_list(messages, role_conversions=openai_role_conversions)

        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            stop=stop_sequences,
            temperature=0.5,
        )
        return response.choices[0].message.content

In [None]:
llm_engine = OpenAIEngine()

In [None]:
# Create the agent
agent = ReactJsonAgent(tools=[retriever_tool], llm_engine=llm_engine, max_iterations=4, verbose=2)

In [None]:
# Function to run the agent
def run_agentic_rag(question: str) -> str:
    enhanced_question = f"""Using the information contained in your knowledge base, which you can access with the 'retriever' tool,
give a comprehensive answer to the question below.
Respond only to the question asked, response should be concise and relevant to the question.
If you cannot find information, do not give up and try calling your retriever again with different arguments!
Make sure to have covered the question completely by calling the retriever tool several times with semantically different queries.
Your queries should not be questions but affirmative form sentences: e.g. rather than "How do I load a model from the Hub in bf16?", query should be "load a model from the Hub bf16 weights".

Question:
{question}"""

    return agent.run(enhanced_question)


In [None]:
# Example usage
question = "How can I push a model to the Hub?"
answer = run_agentic_rag(question)
print(f"Question: {question}")
print(f"Answer: {answer}")

### Comparing Agentic RAG to standard RAG

In [None]:
# Standard RAG function
def run_standard_rag(question: str) -> str:
    context = retriever_tool(question)

    prompt = f"""Given the question and supporting documents below, give a comprehensive answer to the question.
Respond only to the question asked, response should be concise and relevant to the question.
Provide the number of the source document when relevant.

Question:
{question}

{context}
"""
    messages = [{"role": "user", "content": prompt}]

    reader_llm = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")

    return reader_llm.chat_completion(messages).choices[0].message.content

In [None]:
# Compare Agentic RAG and Standard RAG
question = "How can I push a model to the Hub?"
agentic_answer = run_agentic_rag(question)

In [None]:
standard_answer = run_standard_rag(question)

In [None]:
print("Agentic RAG Answer:")
print(agentic_answer)


In [None]:
print("\nStandard RAG Answer:")
print(standard_answer)