In [1]:
from pathlib import Path
import getpass
import os

from dotenv import load_dotenv

# Load the .env file
_ = load_dotenv()

* Models

In [2]:

from agno.agent import Agent, RunResponse
from agno.models.huggingface import HuggingFace


hg_model=HuggingFace(
        id="meta-llama/Meta-Llama-3-8B-Instruct",
        max_tokens=4096,
        api_key=os.getenv("HUGGINGFACE_API_TOKEN"),
)
    

In [3]:
from agno.embedder.ollama import OllamaEmbedder

ollama_embedder = OllamaEmbedder(id="nomic-embed-text:latest",dimensions=768)


* Vector Database

In [4]:
from agno.vectordb.lancedb import LanceDb, SearchType

# Define the database URL where the vector database will be stored
db_url = "./tmp/lancedb"
vector_db = LanceDb(
    table_name="recipes",  # Table name in the vector database
    uri=db_url,  # Location to initiate/create the vector database
    embedder=ollama_embedder,  # Without using this, it will use OpenAIChat embeddings by default
    search_type=SearchType.vector,
)

In [5]:
from agno.knowledge.pdf_url import PDFUrlKnowledgeBase

# Create a knowledge base of PDFs from URLs
knowledge_base = PDFUrlKnowledgeBase(
    urls=["https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"],
    vector_db=vector_db
)
# Load the knowledge base
knowledge_base.load(recreate=False)

In [6]:
knowledge_base

PDFUrlKnowledgeBase(reader=PDFUrlReader(chunk=True, chunk_size=3000, separators=['\n', '\n\n', '\r', '\r\n', '\n\r', '\t', ' ', '  '], chunking_strategy=<agno.document.chunking.fixed.FixedSizeChunking object at 0x000002298A65F610>), vector_db=<agno.vectordb.lancedb.lance_db.LanceDb object at 0x000002298A246660>, num_documents=5, optimize_on=1000, chunking_strategy=<agno.document.chunking.fixed.FixedSizeChunking object at 0x000002298A65F610>, urls=['https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf'])

* Traditional RAG 

In [15]:
# Create and use the agent
rag = Agent(model=hg_model,
            knowledge=knowledge_base,  
              # Add a tool to search the knowledge base which enables agentic RAG.
               # This is enabled by default when `knowledge` is provided to the Agent.
            search_knowledge=False,
            show_tool_calls=True,
            debug_mode=False,
            markdown=False)



In [16]:

response = rag.run("How do I make chicken and galangal in coconut milk soup")
print("response:", response.content)


response: To make chicken and galangal in coconut milk soup, first combine 2 cups of chicken breast or thighs with 2 tablespoons of vegetable oil in a large pot over medium heat. Add 1 onion, diced, and 3 cloves of garlic, minced, and cook until the onion is translucent. Add 1 piece of galangal, sliced, and 1-2 kaffir lime leaves and cook for an additional 1-2 minutes. Pour in 4 cups of chicken broth and 1 can of full-fat coconut milk, stirring well to combine. Bring the soup to a simmer and let cook for 15-20 minutes, or until the chicken is cooked through and the flavors have melded together. Season with salt and pepper to taste, then serve hot.


* Agent with knowledge

In [19]:
# Create and use the agent
agent = Agent(model=hg_model,
              name="RAG Agent",
              knowledge=knowledge_base,  
              # Add a tool to search the knowledge base which enables agentic RAG.
               # This is enabled by default when `knowledge` is provided to the Agent.
              instructions=["Always search your knowledge base first and use it if available.Your job is to answer questions about Thai recipes."],
              search_knowledge=True,
              show_tool_calls=True,
              debug_mode=False,
              markdown=False)


* Tests

In [20]:
response = agent.run("How do I make chicken and galangal in coconut milk soup")
print("response:", response.content)
# latencys 6.8

