In [1]:
from phi.agent import Agent
from phi.model.ollama import OllamaTools
from phi.embedder.ollama import OllamaEmbedder
from phi.knowledge.pdf import PDFUrlKnowledgeBase
from phi.vectordb.lancedb import LanceDb, SearchType

In [2]:
OLLAMA_HOST="localhost"

In [3]:
# Create a knowledge base from a PDF
knowledge_base = PDFUrlKnowledgeBase(
    urls=["https://docs.redhat.com/en-us/documentation/red_hat_openshift_ai_self-managed/2.15/pdf/serving_models/Red_Hat_OpenShift_AI_Self-Managed-2.15-Serving_models-en-US.pdf"],
    # Use LanceDB as the vector database
    vector_db=LanceDb(
        table_name="ocp",
        uri="tmp/lancedb",
        search_type=SearchType.vector,
        # Change dimensions according to model context size and requirements
        #embedder=OllamaEmbedder(model="mxbai-embed-large", dimensions=512, host=OLLAMA_HOST), # doesn't work due to NaNs
        embedder=OllamaEmbedder(model="nomic-embed-text", dimensions=512, host=OLLAMA_HOST),
    ),
)

[2024-11-16T20:49:47Z WARN  lance::dataset] No existing dataset at /Users/sroecker/code/AgenticAI-HandsOn/notebooks/tmp/lancedb/ocp.lance, it will be created


In [4]:
# Comment out after first run as the knowledge base is loaded
knowledge_base.load()

In [5]:
agent = Agent(
    #model=OllamaTools(id="granite3-dense:2b-instruct-q8_0"),
    #model=OllamaTools(id="llama3.2:3b-instruct-q8_0"),
    #model=OllamaTools(id="hermes3:8b-llama3.1-q8_0"),
    model=OllamaTools(id="llama3.1:8b-instruct-q8_0"),
    markdown=True,
)
agent.print_response("How can LLMs be served on OpenShift AI?", stream=True)

Output()

That's ok but not perfect. Can we make it better? Let's include our knowledge base with the RHOAI model serving documentation.

In [7]:
rag_agent = Agent(
    #model=OllamaTools(id="granite3-dense:2b-instruct-q8_0", options={"num_ctx": 4096}),
    #model=OllamaTools(id="hermes3:8b-llama3.1-q8_0", options={"num_ctx": 4096}),
    # It's import to increase the context size from the Ollama default of 2048, otherwise the input will be truncated
    model=OllamaTools(id="llama3.1:8b-instruct-q8_0", options={"num_ctx": 4096}),
    instructions=["Retrieve relevant OpenShift AI information from the knowledge base"],
    # Add the knowledge base to the agent
    knowledge=knowledge_base,
    add_references_to_prompt=True,
    show_tool_calls=True,
    markdown=True,
)
rag_agent.print_response("How can LLMs be served on OpenShift AI?", stream=True)

Output()

That's a much better answer for OpenShift AI.