In [1]:
%pip install llama_stack_client==0.3.0 fire docling

Collecting docling
  Downloading docling-2.68.0-py3-none-any.whl.metadata (11 kB)
Collecting docling-core<3.0.0,>=2.50.1 (from docling-core[chunking]<3.0.0,>=2.50.1->docling)
  Downloading docling_core-2.59.0-py3-none-any.whl.metadata (7.7 kB)
Collecting docling-parse<5.0.0,>=4.7.0 (from docling)
  Downloading docling_parse-4.7.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (10 kB)
Collecting docling-ibm-models<4,>=3.9.1 (from docling)
  Downloading docling_ibm_models-3.10.3-py3-none-any.whl.metadata (7.3 kB)
Collecting filetype<2.0.0,>=1.2.0 (from docling)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting pypdfium2!=4.30.1,<5.0.0,>=4.30.0 (from docling)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
Collecting pydantic-settings<3.0.0,>=2.3.0 (from docling)
  Downloading pydantic_settings-2.12.0-py3-none-any.whl.metadata (3.4 kB)
Collecting huggingface_hub<1,>=0.23 (from doclin

In [2]:
from llama_stack_client import RAGDocument, LlamaStackClient
from docling.document_converter import DocumentConverter

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
client = LlamaStackClient(base_url="http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321")

In [4]:
models = client.models.list()
model_id = next(m.identifier for m in models if m.model_type == "llm")
embedding_model = next(m for m in models if m.model_type == "embedding")
embedding_model_id = embedding_model.identifier
embedding_dimension = int(embedding_model.metadata["embedding_dimension"])

INFO:httpx:HTTP Request: GET http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321/v1/models "HTTP/1.1 200 OK"


In [5]:
vector_db_name = "my_pgvector_db"

In [6]:
# Explicit - specify embedding model and/or provider when you need specific ones
vs = client.vector_stores.create(
    name=vector_db_name,
    extra_body={
    "provider_id": "pgvector", # Optional: specify vector store provider
    "embedding_model": embedding_model_id,
    "embedding_dimension": embedding_dimension # Optional: will be auto-detected if not provided
    }
)
print(f"Registered remote pgvector DB: {vs.id}")
vector_db_id = vs.id

INFO:httpx:HTTP Request: POST http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321/v1/vector_stores "HTTP/1.1 200 OK"


Registered remote pgvector DB: vs_6fb2c6fd-2a11-48f6-9a88-ea9b66b13e8c


In [7]:
# RAW Text Ingestion
raw_text = """
LlamaStack can embed raw text into a vector store for retrieval.
This example ingests a small passage for demonstration.
"""
document = RAGDocument(
    document_id="text_doc_id_001",
    content=raw_text,
    mime_type="text/plain",
    metadata={"source": "example_passage"},
)
client.tool_runtime.rag_tool.insert(
    documents=[document],
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=100,
)
print("Raw text ingested successfully")

INFO:httpx:HTTP Request: POST http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321/v1/tool-runtime/rag-tool/insert "HTTP/1.1 200 OK"


Raw text ingested successfully


In [8]:
# HTML Ingestion
source = "https://www.paulgraham.com/greatwork.html"
document = RAGDocument(
    document_id="html_doc_id_001",
    content=source,
    mime_type="text/html",
    metadata={},
)
client.tool_runtime.rag_tool.insert(
    documents=[document],
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=50,
)
print("HTML ingested successfully")

INFO:httpx:HTTP Request: POST http://llamastack-with-config-service.llama-stack.svc.cluster.local:8321/v1/tool-runtime/rag-tool/insert "HTTP/1.1 200 OK"


HTML ingested successfully


In [9]:
# PDF as Markdown Ingestion
# 1. Initialize the converter
source = "./red_hat_openshift_ai_self-managed-2.16-getting_started_with_red_hat_openshift_ai_self-managed-en-us.pdf"
converter = DocumentConverter()

# 2. Convert the PDF
# This step handles layout analysis, table extraction, etc.
result = converter.convert(source)

# 3. Export to Markdown for the best RAG performance
markdown_content = result.document.export_to_markdown()

# 4. Ingest into your RAGDocument
document = RAGDocument(
    document_id="pdf_as_markdown_doc_id_001",
    content=markdown_content,
    mime_type="text/markdown",  # Note: Use markdown for better LLM reasoning
    metadata={
        "source": source,
        "page_count": len(result.document.pages)
    },
)
client.tool_runtime.rag_tool.insert(
    documents=[document],
    vector_db_id=vector_db_id,
    chunk_size_in_tokens=512,
)
print("PDF as Markdown ingested successfully")

INFO:docling.datamodel.document:detected formats: [<InputFormat.PDF: 'pdf'>]
INFO:docling.document_converter:Going to convert document batch...
INFO:docling.document_converter:Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
INFO:docling.models.factories.base_factory:Loading plugin 'docling_defaults'
INFO:docling.models.factories:Registered picture descriptions: ['vlm', 'api']
INFO:docling.models.factories.base_factory:Loading plugin 'docling_defaults'
INFO:docling.models.factories:Registered ocr engines: ['auto', 'easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
INFO:docling.models.auto_ocr_model:rapidocr cannot be used because onnxruntime is not installed.
INFO:docling.models.auto_ocr_model:easyocr cannot be used because it is not installed.
INFO:docling.utils.accelerator_utils:Accelerator device: 'cpu'
[32m[INFO] 2026-01-17 22:16:38,882 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2026-01-17 22:16:38,885 [R

PDF as Markdown ingested successfully
