In [1]:
%pip install llama_stack_client==0.3.0

Note: you may need to restart the kernel to use updated packages.


In [2]:
from llama_stack_client import Agent, AgentEventLogger, LlamaStackClient

In [3]:
client = LlamaStackClient(base_url="http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321")

In [4]:
models = client.models.list()
models
model_id = next(m.identifier for m in models if m.model_type == "llm")
embedding = next(m for m in models if m.model_type == "embedding")
embedding_model_id = embedding.identifier
embedding_dimension = int(embedding.metadata["embedding_dimension"])

INFO:httpx:HTTP Request: GET http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321/v1/models "HTTP/1.1 200 OK"


In [5]:
pg_store = next(
    (vs for vs in client.vector_stores.list() if vs.metadata['provider_id'] == "pgvector"), 
    None
)
vector_db_id = pg_store.id
print(pg_store)

INFO:httpx:HTTP Request: GET http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321/v1/vector_stores "HTTP/1.1 200 OK"


AttributeError: 'NoneType' object has no attribute 'id'

In [None]:
print(embedding_model_id)

In [None]:
print(model_id)

In [None]:
# client.vector_dbs.list()

In [None]:
print(vector_db_id)

In [None]:
query = "What benefits do the ingested passages provide for retrieval?"

response = client.responses.create(
    model=model_id,
    input=query,
    tools=[
        {
            "type": "file_search",
            "vector_store_ids": [vector_db_id],
        }
    ],
)
print("Responses API result:", getattr(response, "output_text", response))

In [None]:
query = "What benefits do the ingested passages provide for retrieval?"
result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_db_id],
    content=query,
)
print("Low-level query result:", result)

In [None]:
# Create an Agent for conversational RAG queries
agent = Agent(
    client,
    model=model_id,
    instructions="You are a helpful assistant that can use tools to answer questions.",
    tools=[
        {
            "name": "builtin::rag/knowledge_search",
            "args": {"vector_db_ids": [vector_db_id]},
        }
    ],
)

prompt = "How do you do great work?"
print("Prompt>", prompt)

# Create a session and run a streaming turn
session_id = agent.create_session("rag_session")
response = agent.create_turn(
    messages=[{"role": "user", "content": prompt}],
    session_id=session_id,
    stream=True,
)

# Log and print the agent's response
for log in AgentEventLogger().log(response):
    log.print()

In [None]:
# Example RAG query for one-off lookups
query = "What is a Data Science Workflow?"
result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_db_id],
    content=query,
)
print("Low-level query result:", result)

In [None]:
# Example RAG query for one-off lookups
query = "What are the default workbench images provided by RHOAI?"
result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_db_id],
    content=query,
)
print("Low-level query result:", result)

In [None]:
# Create an Agent for conversational RAG queries
agent = Agent(
    client,
    model=model_id,
    instructions="You are a helpful assistant.",
    tools=[
        {
            "name": "builtin::rag/knowledge_search",
            "args": {"vector_db_ids": [vector_db_id]},
        }
    ],
)

prompt = "What are the default workbench images provided by RHOAI?"
print("Prompt>", prompt)

# Create a session and run a streaming turn
session_id = agent.create_session("rag_session")
response = agent.create_turn(
    messages=[{"role": "user", "content": prompt}],
    session_id=session_id,
    stream=True,
)

# Log and print the agent's response
for log in AgentEventLogger().log(response):
    log.print()