In [4]:
!pip install -r requirements.txt

Collecting fire (from -r requirements.txt (line 4))
  Downloading fire-0.7.0.tar.gz (87 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25ldone
[?25h  Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114299 sha256=8bda20acf38cfa1475126e3b11997900439af8dce7c42aaebe69bcd18c389f1e
  Stored in directory: /tmp/pip-ephem-wheel-cache-r8cqgojh/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89
Successfully built fire
Installing collected packages: fire
Successfully installed fire-0.7.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [5]:
import os
import sys
from dotenv import load_dotenv
from llama_stack_client import LlamaStackClient, RAGDocument
import pandas as pd
import logging

In [6]:
sys.path.append('..')
# Load environment variables from .env file
load_dotenv()

logger = logging.getLogger(__name__)
logger.setLevel("INFO")

# Initialize the Llama Stack client
client = LlamaStackClient(
    base_url=os.getenv("LLAMA_STACK_SERVER_URL", "http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321")
)

file_path = "data/Commercial-Direct-LATAM-USD-Q3-2025-Subscriptions.csv"
vector_db_skus_id = "skus_rh_vector_db"
vector_db_ocp_id = "ocp_rh_vector_db"

logger.info("Connected to Llama Stack server")

INFO:__main__:Connected to Llama Stack server


In [7]:
logger.info(f"Preparing documents from {file_path}...")

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)
df.fillna('', inplace=True)

# Get the list of columns in the DataFrame
documents = []
for index, row in df.iterrows():
    # Combine relevant columns into the document's content/text
    # This is what the embedding model will primarily "read"
    text_content = (
        f"Product Name: {row['Product']}. SKU Description: {row['SKU_Description']}."
        f"SKU Number: {row['SKU']}. Price: ${row['List_Price']}."
    )

    # Include all original CSV columns as metadata
    # This metadata can be used for filtering during retrieval or just for context
    metadata = row.to_dict()

    logger.debug(f"Processing document {index + 1}: {text_content}...")

    # Create document object to ingest
    documents.append(
        RAGDocument(
            # Use the index or a unique identifier
            document_id=str(index) + "-SKU-RH-LATAM-Q3-2025",
            # Assuming the content is plain text
            mime_type="text/plain",
            # 'content' is the field for the main text
            content=text_content,
            metadata=metadata
        )
    )

logger.info(f"Prepared {len(documents)} documents.")

INFO:__main__:Preparing documents from data/Commercial-Direct-LATAM-USD-Q3-2025-Subscriptions.csv...
INFO:__main__:Prepared 680 documents.


In [8]:
logger.info("Preparing documents from URLs...")

# ingest the documents into the newly created document collection
urls = [
    #("https://raw.githubusercontent.com/lcoronad/ai-transformers/main/skus_red_hat_openshift.rst", "text/plain"),
    ("https://www.openshift.guide/openshift-guide-screen.pdf", "application/pdf"),
]

# Create document object to ingest
documents_ocp = [
    RAGDocument(
        document_id=f"num-{i}",
        content=url,
        mime_type=url_type,
        metadata={},
    )
    for i, (url, url_type) in enumerate(urls)
]

logger.info(f"Prepared {len(documents_ocp)} documents from URLs.")

INFO:__main__:Preparing documents from URLs...
INFO:__main__:Prepared 1 documents from URLs.


In [10]:
# define and register the document collection to be used
client.vector_dbs.register(
    vector_db_id=vector_db_skus_id,
    embedding_model=os.getenv("VDB_EMBEDDING", "all-MiniLM-L6-v2"),
    embedding_dimension=int(os.getenv("VDB_EMBEDDING_DIMENSION", 384)),
    provider_id=os.getenv("VDB_PROVIDER", "faiss"),
)

# Insert documents to the vector database
client.tool_runtime.rag_tool.insert(
    documents=documents,
    vector_db_id=vector_db_skus_id,
    chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 512)),
)

logger.info(f"Documents ingested into RAG {vector_db_skus_id} successfully.")

INFO:httpx:HTTP Request: POST http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321/v1/vector-dbs "HTTP/1.1 200 OK"
INFO:llama_stack_client._base_client:Retrying request to /v1/tool-runtime/rag-tool/insert in 0.466048 seconds
INFO:httpx:HTTP Request: POST http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321/v1/tool-runtime/rag-tool/insert "HTTP/1.1 200 OK"
INFO:__main__:Documents ingested into RAG skus_rh_vector_db successfully.


In [12]:
# define and register the document collection to be used
client.vector_dbs.register(
    vector_db_id=vector_db_ocp_id,
    embedding_model=os.getenv("VDB_EMBEDDING", "all-MiniLM-L6-v2"),
    embedding_dimension=int(os.getenv("VDB_EMBEDDING_DIMENSION", 384)),
    provider_id=os.getenv("VDB_PROVIDER", "faiss"),
)

# Insert documents to the vector database
client.tool_runtime.rag_tool.insert(
    documents=documents_ocp,
    vector_db_id=vector_db_ocp_id,
    chunk_size_in_tokens=int(os.getenv("VECTOR_DB_CHUNK_SIZE", 512)),
)

logger.info(f"Documents ingested into RAG {vector_db_ocp_id} successfully.")

INFO:httpx:HTTP Request: POST http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321/v1/vector-dbs "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321/v1/tool-runtime/rag-tool/insert "HTTP/1.1 200 OK"
INFO:__main__:Documents ingested into RAG ocp_rh_vector_db successfully.


In [11]:
query = "List of Red Hat OpenShift SKUs"

# Execute the query against the vector database
result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_db_skus_id],
    query_config={"query": query},
    content=query,
)

logger.info(f"RAG Query from {vector_db_skus_id} - Result: \n{result.content}")

INFO:httpx:HTTP Request: POST http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321/v1/tool-runtime/rag-tool/query "HTTP/1.1 200 OK"
INFO:__main__:RAG Query from skus_rh_vector_db - Result: 
[TextContentItem(text='knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n', type='text'), TextContentItem(text="Result 1\nContent: Product Name: OpenShift AI. SKU Description: Red Hat OpenShift AI, Standard (2 Cores or 4 vCPUs).SKU Number: MCT4547. Price: $1.485,00.\nMetadata: {'YEAR': 2025, 'QUARTER': 'Q3', 'SKU': 'MCT4547', 'SKU_Description': 'Red Hat OpenShift AI, Standard (2 Cores or 4 vCPUs)', 'Product': 'OpenShift AI', 'Currency': 'USD', 'List_Price': '1.485,00', 'Unit_of_Measure': 'CORE BAND', 'Cores': '2', 'Nodes': '0', 'Sockets': 0, 'Virtual_Guests': '0', 'Support_Level': 'L1-L3', 'Support_Type': 'Standard', 'Category': 'SUBSCRIPTIONS - ELS', 'Region': 'LATAM', 'Country': 'ALL', 'Service_Term': '1 YEARS', 'document_id': '307-SKU-RH-LATAM-Q3-2025'}\n",

In [13]:
query = "What is Red Hat OpenShift?"

# Execute the query against the vector database
result = client.tool_runtime.rag_tool.query(
    vector_db_ids=[vector_db_ocp_id],
    query_config={"query": query},
    content=query,
)

logger.info(f"RAG Query from {vector_db_ocp_id} - Result: \n{result.content}")

INFO:httpx:HTTP Request: POST http://llamastack-server.rh-proposal-ai.svc.cluster.local:8321/v1/tool-runtime/rag-tool/query "HTTP/1.1 200 OK"
INFO:__main__:RAG Query from ocp_rh_vector_db - Result: 
[TextContentItem(text='knowledge_search tool found 5 chunks:\nBEGIN of knowledge_search tool results.\n', type='text'), TextContentItem(text='Result 1\nContent: .\nThese characteristics set OpenShift apart as an excellent Kubernetes platform for enterprise users.\nThe latest version of OpenShift available at the time of this writing is 4.12.\n3.2. Is Red Hat OpenShift Open Source?\nRed Hat OpenShift is a commercial product based on an open-source project called OKD. This\nacronym means " OpenShift Kubernetes Distribution" and is publicly available for everyone to\ninspect and contribute. Like the upstream Kubernetes project, OKD developers use the Go\nprogramming language.\n3.3. How can I run OpenShift?\nToday, Red Hat OpenShift is available through various mechanisms and formats:\n• DevOps