## Red Hat Product Documentation Ingestion

Using this notebook and the various dependencies, you can extract and ingest a specific Red Hat Product documentation inside a Milvus database using Nomic AI Embeddings.

### Needed packages

In [None]:
!pip install -r requirements.txt

In [None]:
import json
import rh_documentation_ingestion as rhdi

## Product information

In [None]:
class product_info:
    def __init__(self, product, product_full_name, version, language):
        self.product = product
        self.product_full_name = product_full_name
        self.version = version
        self.language = language

    def __repr__(self):
        return f"product_info(product={self.product}, product_full_name={self.product_full_name}, version={self.version}, language={self.language})"

In [None]:
# Load collections from JSON file
with open('collections.json', 'r') as file:
    data = json.load(file)

# Iterate over the products and versions
products = []
for item in data:
    product = item['product']
    product_full_name = item['product_full_name']
    language = item['language']

    for version in item['version']:
        # Create an instance of the product_info class and append to the list
        products.append(product_info(product, product_full_name, version, language))

## Ingestion

In [None]:
milvus = {}
milvus["MILVUS_HOST"] = os.getenv('MILVUS_HOST')
milvus["MILVUS_PORT"] = os.getenv('MILVUS_PORT')
milvus["MILVUS_USERNAME"] = os.getenv('MILVUS_USERNAME')
milvus["MILVUS_PASSWORD"] = os.getenv('MILVUS_PASSWORD')
embeddings_endpoint = {}
embeddings_endpoint["EMBEDDINGS_API_URL"] = os.getenv('EMBEDDINGS_API_URL')
embeddings_endpoint["EMBEDDINGS_API_KEY"] = os.getenv('EMBEDDINGS_API_KEY')

In [None]:
for product in products:
    print('-----------------------------------')
    print(f'Processing "{product.product_full_name}" at version {product.version}, language {product.language}')
    try:
        rhdi.ingest_documentation(product, milvus, embeddings_endpoint)
    except Exception as e:
        print(f'Error processing "{product.product_full_name}" at version {product.version}, language {product.language}')
        print(f'{e}')