In [1]:
!pip install -r requirements.txt

Collecting llama_stack_client==0.2.22 (from -r requirements.txt (line 3))
  Downloading llama_stack_client-0.2.22-py3-none-any.whl.metadata (16 kB)
Collecting gradio[mcp] (from -r requirements.txt (line 2))
  Downloading gradio-5.49.1-py3-none-any.whl.metadata (16 kB)
Collecting distro<2,>=1.7.0 (from llama_stack_client==0.2.22->-r requirements.txt (line 3))
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting fire (from llama_stack_client==0.2.22->-r requirements.txt (line 3))
  Downloading fire-0.7.1-py3-none-any.whl.metadata (5.8 kB)
Collecting pyaml (from llama_stack_client==0.2.22->-r requirements.txt (line 3))
  Downloading pyaml-25.7.0-py3-none-any.whl.metadata (12 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio[mcp]->-r requirements.txt (line 2))
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting brotli>=1.1.0 (from gradio[mcp]->-r requirements.txt (line 2))
  Downloading brotli-1.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x

In [2]:
import os
import sys
from dotenv import load_dotenv
from llama_stack_client import LlamaStackClient, RAGDocument
import pandas as pd
import logging

In [17]:
sys.path.append('..')
# Load environment variables from .env file
load_dotenv()

logger = logging.getLogger(__name__)
logger.setLevel("INFO")

# Initialize the Llama Stack client
client = LlamaStackClient(
    #base_url=os.getenv("LLAMA_STACK_SERVER_URL", "http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321")
    base_url=os.getenv("LLAMA_STACK_SERVER_URL", "http://llama-stack-milvus-remote-service.rh-proposal-ai.svc.cluster.local:8321")
)

file_path = "data/Commercial-Direct-LATAM-USD-Q3-2025-Subscriptions.csv"
vector_db_skus_name = "skus_rh_vector_db"
vector_db_ocp_name = "ocp_rh_vector_db"

logger.info("Connected to Llama Stack server")

INFO:__main__:Connected to Llama Stack server


In [18]:
logger.info(f"Preparing documents from {file_path}...")

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)
df.fillna('', inplace=True)

# Get the list of columns in the DataFrame
documents = []
for index, row in df.iterrows():
    # Combine relevant columns into the document's content/text
    # This is what the embedding model will primarily "read"
    text_content = (
        f"Year: {row['YEAR']}. Quarter: {row['QUARTER']}. SKU: {row['SKU']}. SKU Description: {row['SKU_Description']}. Product: {row['Product']}. Currency: {row['Currency']}. List Price: ${row['List_Price']}. Unit of Measure: {row['Unit_of_Measure']}. Cores: {row['Cores']}. Nodes: {row['Nodes']}. Sockets: {row['Sockets']}. Virtual Guests: {row['Virtual_Guests']}. Support Level: {row['Support_Level']}. Support Type: {row['Support_Type']}. Category: {row['Category']}. Region: {row['Region']}. Country: {row['Country']}. Service Term: {row['Service_Term']}."
    )

    # Include all original CSV columns as metadata
    # This metadata can be used for filtering during retrieval or just for context
    metadata = row.to_dict()

    logger.debug(f"Processing document {index + 1}: {text_content}...")

    # Create document object to ingest
    documents.append(
        RAGDocument(
            # Use the index or a unique identifier
            document_id=str(index) + "-SKU-RH-LATAM-Q3-2025",
            # Assuming the content is plain text
            mime_type="text/plain",
            # 'content' is the field for the main text
            content=text_content,
            metadata=metadata
        )
    )

logger.info(f"Prepared {len(documents)} documents.")

INFO:__main__:Preparing documents from data/Commercial-Direct-LATAM-USD-Q3-2025-Subscriptions.csv...
INFO:__main__:Prepared 680 documents.


In [19]:
logger.info("Preparing documents from URLs...")

# ingest the documents into the newly created document collection
urls = [
    ("https://www.openshift.guide/openshift-guide-screen.pdf", "application/pdf"),
]

# Create document object to ingest
documents_ocp = [
    RAGDocument(
        document_id=f"num-{i}",
        content=url,
        mime_type=url_type,
        metadata={},
    )
    for i, (url, url_type) in enumerate(urls)
]

logger.info(f"Prepared {len(documents_ocp)} documents from URLs.")

INFO:__main__:Preparing documents from URLs...
INFO:__main__:Prepared 1 documents from URLs.


In [21]:
vector_stores = client.vector_stores.list().data

logger.info(f"Vector Stores: {vector_stores}")

vector_db_skus_id = ""
vector_db_ocp_id = ""

for vector_store in vector_stores:
    if (vector_store.name == vector_db_skus_name):
        vector_db_skus_id = vector_store.id
        logger.info(f"Vector Store id skus: {vector_db_skus_id}")
    if (vector_store.name == vector_db_ocp_name):
        vector_db_ocp_id = vector_store.id
        logger.info(f"Vector Store id ocp: {vector_db_ocp_id}")

delete_vector_stores = False

if (delete_vector_stores):
    # Delete thr vector store of SKUs
    client.vector_stores.delete(
        vector_store_id=vector_db_skus_id,
    )

    # Delete the vector store of Openshift
    client.vector_stores.delete(
        vector_store_id=vector_db_ocp_id,
    )

INFO:httpx:HTTP Request: GET http://llama-stack-milvus-remote-service.rh-proposal-ai.svc.cluster.local:8321/v1/openai/v1/vector_stores "HTTP/1.1 200 OK"
INFO:__main__:Vector Stores: [VectorStore(id='vs_edd9686d-6e94-436d-8371-d12bed06e027', created_at=1762533012, file_counts=FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1), metadata={'provider_id': 'milvus-remote', 'provider_vector_db_id': 'vs_edd9686d-6e94-436d-8371-d12bed06e027'}, object='vector_store', status='completed', usage_bytes=0, expires_after=None, expires_at=None, last_active_at=1762533012, name='ocp_rh_vector_db'), VectorStore(id='vs_3d80ae85-456a-46b6-9314-2b86f502f323', created_at=1762532727, file_counts=FileCounts(cancelled=0, completed=2040, failed=0, in_progress=0, total=2040), metadata={'provider_id': 'milvus-remote', 'provider_vector_db_id': 'vs_3d80ae85-456a-46b6-9314-2b86f502f323'}, object='vector_store', status='completed', usage_bytes=0, expires_after=None, expires_at=None, last_active_at=1

In [None]:
# Fetch all registered models
models = client.models.list()
embedding_model = next(m for m in models if m.model_type == "embedding")
embedding_model_id = embedding_model.identifier
embedding_dimension = int(embedding_model.metadata["embedding_dimension"])

logger.info(f"Documents ingested into RAG {vector_db_skus_name} successfully.")

vector_store_skus = client.vector_stores.create(
    name=vector_db_skus_name,
    embedding_model=embedding_model_id,
    embedding_dimension=embedding_dimension,
    provider_id="milvus-remote",
    metadata={
        "provider_vector_db_id": vector_db_skus_id
    }
)

logger.info(f"The vector store ID created is: {vector_store_skus.id}")

# Insert documents to the vector database
client.tool_runtime.rag_tool.insert(
    documents=documents,
    vector_db_id=vector_store_skus.id,
    chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 512)),
    timeout=600,
)

logger.info(f"Documents ingested into RAG {vector_db_skus_name} successfully.")

In [None]:
# Fetch all registered models
models = client.models.list()
embedding_model = next(m for m in models if m.model_type == "embedding")
embedding_model_id = embedding_model.identifier
embedding_dimension = int(embedding_model.metadata["embedding_dimension"])

vector_store_ocp = client.vector_stores.create(
    name=vector_db_ocp_name,
    embedding_model=embedding_model_id,
    embedding_dimension=embedding_dimension,
    provider_id="milvus-remote",
)

logger.info(f"The vector store ID created is: {vector_store_ocp.id}")

# Insert documents to the vector database
client.tool_runtime.rag_tool.insert(
    documents=documents_ocp,
    vector_db_id=vector_store_ocp.id,
    chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 512)),
    timeout=300,
)

logger.info(f"Documents ingested into RAG {vector_db_ocp_name} successfully.")

In [None]:
query = "List of Red Hat OpenShift SKUs"

# Execute the query against the vector database
result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_store_skus.id],
    query_config={"query": query},
    content=query,
)

logger.info(f"RAG Query from {vector_db_skus_name} - Result: \n{result.content}")

In [None]:
query = "What is Red Hat OpenShift?"

# Execute the query against the vector database
result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_store_ocp.id],
    query_config={"query": query},
    content=query,
)

logger.info(f"RAG Query from {vector_db_ocp_name} - Result: \n{result.content}")