Let's start by installing the Python Libraries we neeed

https://github.com/meta-llama/llama-stack/blob/main/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb

In [None]:

!pip install -U llama-stack-client dotenv


When running this code in a regular Python application, we would usually like to read environment variables from an `.env` file, for our needs in this lab, we will hard code these in this cell, to make things more clear

In [None]:
import os

# Load environment variables from .env file
from dotenv import load_dotenv
load_dotenv()

# for our lab, we will just define our variables manualy here:
os.environ['LLAMA_STACK_SERVER'] = 'http://localhost:8321'
os.environ['LLAMA_STACK_MODEL'] = 'meta-llama/Llama-3.2-3B-Instruct'

As a first step, let's define our client, provide it our Llama-Stack Server location and select the model we would like to work with, later, we will see that pointing this to a different location (Llama-Stack Serve) is all we would need to do to move to a production environment.

In [None]:
from llama_stack_client import LlamaStackClient

LLAMA_STACK_SERVER=os.getenv("LLAMA_STACK_SERVER")
LLAMA_STACK_MODEL=os.getenv("LLAMA_STACK_MODEL")

client = LlamaStackClient(base_url=LLAMA_STACK_SERVER)

# List available models
models = client.models.list()
print("--- Available models: ---")
for m in models:
    print(f"{m.identifier} - {m.provider_id} - {m.provider_resource_id}")


Now that our client is set up, let's go through some very simple code snippets, to get you familiar with the syntex. If you used other AI Frameworks, this will soon feel very familiar, as Llamastack follows similar principals and terminology, while allowing a standard to help you quickly shift different components in and out 

Let's see what vectorDBs our server support out of the box

In [None]:
# Get provider list and print it out 
providers = client.providers.list()
for provider in providers:
    print(provider)
    
    
# select vector_io providers into array
vector_providers = [
    provider for provider in client.providers.list() if provider.api == "vector_io"
]

# In this example, we only have one provider, but on other server we might have many. here, we simply select the first one.
selected_vector_provider = vector_providers[0]


vector_db_id = f"test_vector_db_{uuid.uuid4()}"
client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model="all-MiniLM-L6-v2",
    embedding_dimension=384,
    provider_id=selected_vector_provider.provider_id,
)


In [None]:
import uuid

vector_db_id = f"test_vector_db_{uuid.uuid4()}"
client.vector_dbs.register(
    vector_db_id=vector_db_id,
    embedding_model="all-MiniLM-L6-v2",
    embedding_dimension=384,
    provider_id=selected_vector_provider.provider_id,
)


In [None]:
from llama_stack_client.types import Document
urls = [
    "Crystal_Vortex.md",
    "Emberwild_Canyon.md",
    "Frostveil_Tundra.md",
    "Skyreach_Peaks.md",
    "Verdant_Mirage.md",
]
documents = [
    Document(
        document_id=f"num-{i}",
        content=f"https://raw.githubusercontent.com/rhpds/llamastack-lab/refs/heads/main/assets/Parks/{url}",
        mime_type="text/plain",
        metadata={},
    )
    for i, url in enumerate(urls)
]

In [None]:
client.tool_runtime.rag_tool.insert(
    documents=documents,
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=300,
)

In [None]:
from llama_stack_client import Agent

rag_agent = Agent(
    client,
    model=os.environ['LLAMA_STACK_MODEL'],
    instructions="You should always use the RAG tool to answer questions, only answer what you are asked, don't add more information than requested",
    tools=[{
        "name": "builtin::rag",
        "args": {"vector_db_ids": [vector_db_id]},
    }],
       sampling_params={
            "strategy": {"type": "top_k", "temperature": 0.1, "top_k": 2},},
)

#    instructions="You are a helpful assistant that can answer questions about the national parks. Answer only about the park you are asked, don't provide information about other parks. You should always use the RAG tool to answer questions. speak like a sailor",


In [None]:
# First, let's come up with a couple of examples to test the agent
examples = [
    {
        "input_query": "What is the cost of entry to Crystal Vortex",
        "expected_answer": "12$ for individuals and 20 for private car or boat"
    },
    {
        "input_query": "What are the Attractions in Frostveil Tundra?",
        "expected_answer": "Northern Lights Viewing Platform,Crystal Snow Elk Observation Trails,Frozen Lake Ice Fishing,Aurora Wolf Tracking Tours"
    },
    {
        "input_query": "when was Verdant Mirage established",
        "expected_answer": "2010"
    },
    {
        "input_query": "What are the camping options in Emberwild Canyon",
        "expected_answer": "Canyon Rim Campgrounds,Oasis Camp,Backcountry Camping "
    }
]

In [None]:
from rich.pretty import pprint
import rich
rag_agent.sessions=[]
for example in examples:
    rag_session_id = rag_agent.create_session(session_name=f"rag_session_{uuid.uuid4()}")
    response = rag_agent.create_turn(
        messages=[
            {
                "role": "user",
                "content": example["input_query"]
            }
        ],
        session_id=rag_session_id,
        stream=False
    )
    rich.print(f"[bold cyan]Question:[/bold cyan] {example['input_query']}")
    rich.print(f"[bold yellow]Agent Answer:[/bold yellow] {response.output_message.content}")

In [None]:

session_response = client.agents.session.retrieve(agent_id=rag_agent.agent_id, session_id=rag_agent.sessions[0])
pprint(session_response.turns)

In [None]:
print(rag_agent.sessions)
session_response = client.agents.session.retrieve(agent_id=rag_agent.agent_id, session_id=rag_agent.sessions[1])
pprint(session_response.turns)

In [None]:
# Unregister all vector databases (THIS IS FOR DEBUG NOT FOR LAB)
for vector_db_id in client.vector_dbs.list():
    print(f"Unregistering vector database: {vector_db_id.identifier}")
    client.vector_dbs.unregister(vector_db_id=vector_db_id.identifier)