In [1]:
!pip install -q einops==0.7.0 langchain==0.1.9 pypdf==4.0.2 pymilvus==2.3.6 sentence-transformers==2.4.0


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import requests
import os
from langchain.document_loaders import PyPDFDirectoryLoader, WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Milvus
from pathlib import Path

In [3]:
def download_and_load_documentation_pdfs(product_name, product_version, sections, language):
    pdfs = [f"https://docs.redhat.com/{language}/documentation/{product_name}/{product_version}/pdf/{section}/{product_name}-{product_version}-{section}-{language}.pdf" for section in sections]
    pdfs_to_urls = {f"{product_name}-{product_version}-{section}-{language}": f"https://docs.redhat.com/{language}/documentation/{product_name}/{product_version}/html-single/{section}/index" for section in sections}

    docs_dir = f"{product_name}-{product_version}-{language}"
    pdf_folder_path = f"./{product_name}-{product_version}-{language}"

    if not os.path.exists(docs_dir):
        os.mkdir(docs_dir)

    for pdf in pdfs:
        try:
            response = requests.get(pdf)
        except:
            print(f"Skipped {pdf}")
            continue
        if response.status_code != 200:
            print(f"Skipped {pdf}")
            continue
        with open(f"{docs_dir}/{pdf.split('/')[-1]}", 'wb') as f:
            f.write(response.content)

    pdf_loader = PyPDFDirectoryLoader(pdf_folder_path)
    pdf_docs = pdf_loader.load()

    # Inject document metadata so that we can find out the LLM answers' source later
    for doc in pdf_docs:
        doc.metadata["source"] = pdfs_to_urls[Path(doc.metadata["source"]).stem.lower()]

    return pdf_docs

In [4]:
def download_and_load_website_text_contents(websites):
    website_loader = WebBaseLoader(websites)
    website_docs = website_loader.load()

    return website_docs

In [5]:
def combine_and_text_splitting(pdf_docs, website_docs):
    merged_documents = pdf_docs + website_docs

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,
                                                   chunk_overlap=128)
    all_splits = text_splitter.split_documents(merged_documents)
    return all_splits

In [6]:
def inject_into_vector_db(document_splits, embedding_model_name, vector_db_collection_name):
    # Create embedding
    model_kwargs = {
        'device': 'cuda',
        'trust_remote_code': True,
    }
    encode_kwargs = {
        'normalize_embeddings': False
    }
    embeddings = HuggingFaceEmbeddings(
        model_name=embedding_model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
        show_progress=True
    )

    # Connect to vector DB
    db = Milvus(
        embedding_function=embeddings,
        connection_args={"host": MILVUS_HOST, "port": MILVUS_PORT, "user": MILVUS_USERNAME, "password": MILVUS_PASSWORD},
        collection_name=vector_db_collection_name,
        metadata_field="metadata",
        text_field="page_content",
        auto_id=True,
        drop_old=True
    )

    # Insert into vector DB
    db.add_documents(document_splits)

    return db

# Program starts here

In [7]:
MILVUS_HOST = "vectordb-milvus.milvus.svc.cluster.local"
MILVUS_PORT = 19530
MILVUS_USERNAME = os.getenv('MILVUS_USERNAME')
MILVUS_PASSWORD = os.getenv('MILVUS_PASSWORD')

### Download Red Hat product documentation PDFs first

In [8]:
pdf_documents = download_and_load_documentation_pdfs(
    product_name="red_hat_openshift_ai_self-managed",
    product_version="2.16",
    sections=[
        "release_notes",
        "introduction_to_red_hat_openshift_ai",
        "getting_started_with_red_hat_openshift_ai_self-managed",
        "openshift_ai_tutorial_-_fraud_detection_example",
        "working_on_data_science_projects",
        "serving_models",
        "monitoring_data_science_models",
        "managing_resources",
        "installing_and_uninstalling_openshift_ai_self-managed",
        "installing_and_uninstalling_openshift_ai_self-managed_in_a_disconnected_environment",
        "upgrading_openshift_ai_self-managed",
        "upgrading_openshift_ai_self-managed_in_a_disconnected_environment",
    ],
    language="en"
)

  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


### Also save websites relevant to Red Hat products

In [9]:
website_documents = download_and_load_website_text_contents(
    websites=[
        "https://ai-on-openshift.io/getting-started/openshift/",
        "https://ai-on-openshift.io/getting-started/opendatahub/",
        "https://ai-on-openshift.io/getting-started/openshift-ai/",
        "https://ai-on-openshift.io/odh-rhoai/configuration/",
        "https://ai-on-openshift.io/odh-rhoai/custom-notebooks/",
        "https://ai-on-openshift.io/odh-rhoai/nvidia-gpus/",
        "https://ai-on-openshift.io/odh-rhoai/custom-runtime-triton/",
        "https://ai-on-openshift.io/odh-rhoai/openshift-group-management/",
        "https://ai-on-openshift.io/tools-and-applications/minio/minio/",
    ]
)

### Combine both and split them into chunks that can save into Vector DB

In [10]:
split_documents = combine_and_text_splitting(pdf_documents, website_documents)

### Inject all split document into Vector DB

In [11]:
vector_db = inject_into_vector_db(
    document_splits=split_documents,
    embedding_model_name="ibm-granite/granite-embedding-278m-multilingual",
    vector_db_collection_name="openshift_ai_2_16_en_document"
)

Batches:   0%|          | 0/39 [00:00<?, ?it/s]

### Verify if documents are injected into Vector DB

In [12]:
query = "How can I work with GPU and taints in OpenShift AI?"
docs_with_score = vector_db.similarity_search_with_score(query)

for doc, score in docs_with_score:
    print("-" * 80)
    print("Score: ", score)
    print(doc.page_content)
    print("-" * 80)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

--------------------------------------------------------------------------------
Score:  0.3126489222049713
But don't worry, OpenShift AI and Open Data Hub take care of this part for you when you launch notebooks, workbenches, model servers, or pipeline runtimes!
Installation
Here is the documentation you can follow:

OpenShift AI documentation
NVIDIA documentation (more detailed)

Advanced configuration
Working with taints
In many cases, you will want to restrict access to GPUs, or be able to provide choice between different types of GPUs: simply stating "I want a GPU" is not enough. Also, if you want to make sure that only the Pods requiring GPUs end up on GPU-enabled nodes (and not other Pods that just end up being there at random because that's how Kubernetes works...), you're at the right place!
The only supported method at the moment to achieve this is to taint nodes, then apply tolerations on the Pods depending on where you want them scheduled. If you don't pay close attention t