## Collection Verification

Using this notebook and the various dependencies, you can verify that the documentation was ingested properly inside the corresponding Milvus collection.

In [1]:
import json
import os

import requests
from dotenv import load_dotenv

import collections_loader as cl
import milvus_handler
from classes import Collection, Source, VersionInfo

load_dotenv()

USER_AGENT environment variable not set, consider setting it to identify your requests.


True

In [2]:
collections_path = os.getenv("COLLECTIONS_PATH")
collections_git_repo_name = os.getenv("COLLECTIONS_GIT_REPO_NAME")
collections_git_repo_path = os.getenv("COLLECTIONS_GIT_REPO_PATH")
collections_git_repo_branch = os.getenv("COLLECTIONS_GIT_REPO_BRANCH")

In [3]:
# Load all JSON files into the collections object
collections = []
collection_loader = cl.CollectionLoader()
if collections_path is not None:
    collection_loader.fetch_collections_from_path(collections, collections_path)
if collections_git_repo_name is not None:
    collections = cl.fetch_collections_from_git(collections, collections_git_repo_name, collections_git_repo_path, collections_git_repo_branch)

# Now you can parse the collections object
for collection in collections:
    print(collection)

ProductInfo(collection_base_name=red_hat_openshift_ai_self-managed, collection_full_name=Red Hat OpenShift AI Self-Managed, versions=[VersionInfo(version_number=2.7, store_directive=delete, sources=[Source(ingestion_type=redhat_doc, language=en-US, urls=None)], VersionInfo(version_number=2.8, store_directive=update, sources=[Source(ingestion_type=redhat_doc, language=en-US, urls=None)], VersionInfo(version_number=2.9, store_directive=delete, sources=[Source(ingestion_type=redhat_doc, language=en-US, urls=None)], VersionInfo(version_number=2.10, store_directive=delete, sources=[Source(ingestion_type=redhat_doc, language=en-US, urls=None)], VersionInfo(version_number=2.11, store_directive=delete, sources=[Source(ingestion_type=redhat_doc, language=en-US, urls=None)], VersionInfo(version_number=2.12, store_directive=delete, sources=[Source(ingestion_type=redhat_doc, language=en-US, urls=None)], VersionInfo(version_number=2.13, store_directive=create_or_keep, sources=[Source(ingestion_type

In [6]:
milvus_endpoint = {}
embeddings_endpoint = {}

milvus_uri = os.getenv("MILVUS_URI")
milvus_username = os.getenv("MILVUS_USERNAME")
milvus_password = os.getenv("MILVUS_PASSWORD")
milvus_db = os.getenv("MILVUS_DB")
milvus_batch_size = int(os.getenv("MILVUS_BATCH_SIZE"))
embeddings_api_url = os.getenv("EMBEDDINGS_API_URL")
embeddings_api_key = os.getenv("EMBEDDINGS_API_KEY")
embeddings_model_name = os.getenv("EMBEDDINGS_MODEL_NAME")

In [7]:
milvus_handler = milvus_handler.MilvusHandler(
    milvus_uri,
    milvus_username,
    milvus_password,
    milvus_db,
    milvus_batch_size,
    embeddings_api_url,
    embeddings_api_key,
    embeddings_model_name
)

In [8]:
query_base = "search_query: How do I install "

for collection in collections:
    for version in collection.versions:
        print('-----------------------------------')
        collection_name = (
            f"{collection.collection_base_name}_{version.version_number}"
            .replace("-", "_")
            .replace(".", "_")
        )
        query = query_base + collection.collection_full_name
        if (milvus_handler.collection_check(collection_name) is not None):
            print(f'Processing "{collection.collection_full_name}", Version: "{version.version_number}"')
            docs_with_score = milvus_handler.similarity_search_with_score(collection, version, query, top_k=10)
            for doc, score in docs_with_score:
                print("-" * 80)
                print("Score: ", score)
                print("Source: ", doc.metadata['source'])
                print(doc.page_content)
                print("-" * 80)

-----------------------------------
-----------------------------------
Processing "Red Hat OpenShift AI Self-Managed", Version: "2.8"
--------------------------------------------------------------------------------
Score:  0.2499142587184906
Source:  https://docs.redhat.com/en/documentation/red_hat_openshift_ai_self-managed/2.8/html-single/installing_and_uninstalling_openshift_ai_self-managed_in_a_disconnected_environment
search_document: Section: Red Hat OpenShift AI Self-Managed / Preface

Content:
Learn how to use both the OpenShift command-line interface and web console to
install Red Hat OpenShift AI Self-Managed on your OpenShift Container Platform
cluster in a disconnected environment. To uninstall the product, learn how to
use the recommended command-line interface (CLI) method.  
Note  
Red Hat recommends that you install only one instance of OpenShift AI on your
cluster.  
Installing the Red Hat OpenShift AI Operator on the same cluster as the Red
Hat OpenShift AI Add-on is 