In [1]:
import sys, os, time, json

notebook_dir = os.getcwd()
parent_dir = os.path.dirname(notebook_dir)
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

import utilities.environment_config as config
from openai import OpenAI

In [2]:
try:
    # This example uses openai to communicate with OpenShift vllm
    client = OpenAI(api_key=config.embedding_model_api_key, base_url=config.embedding_model_endpoint)
    
    with open(config.document_file_location, 'r', encoding='utf-8') as f:
        data = json.load(f)

    test_description = "Starting batch embedding using curl and OpenShift model serving"
    print(test_description)
    print(len(test_description) * "-")
    print("Embeddings computed ...", end= " ")

    # Prepare the raw text for which we want to compute the embeddings
    extracts = list()
    for document in data:
        if 'extract' in document:
            extracts.append(document['extract'])

    batch_size = 250 # How many raw text documents are sent to vllm in each request
    start_offset = 0
    end_offset = batch_size
    embeddings = list() # Let's save the embeddings we get back in here so we can make our assertions later
    number_of_documents = len(extracts)
    
    # Start the counter and the embedding retrieval process
    tic = time.perf_counter()
    print(start_offset, end=" ")
    while start_offset < number_of_documents:
        print(end_offset, end=" ")

        response = client.embeddings.create(
            input=extracts[start_offset:end_offset],
            model=config.embedding_model_name
        )

        embeddings_batch = [item.embedding for item in response.data]
        
        embeddings = embeddings + embeddings_batch
        
        start_offset = end_offset
        end_offset = end_offset + batch_size
        if end_offset > number_of_documents:
            end_offset = number_of_documents

    # Let's make sure we actually got the expected number of embeddings
    # and each embedding is the expected length
    assert len(embeddings) == number_of_documents
    for e in embeddings:
        assert(len(e) == config.embedding_dimensions)
    
    # Print how long it took
    toc = time.perf_counter()
    print("")
    print(f"Time to compute {number_of_documents} embeddings: {toc - tic:0.4f} seconds")
    print(len(test_description) * "-") 
except json.JSONDecodeError as e:
    print(f"Error decoding JSON: {e}")
except Exception as e:
    print(f"An exception occurred generating embeddings: {e}")

Starting batch embedding using curl and OpenShift model serving
---------------------------------------------------------------
Embeddings computed ... 0 250 500 750 1000 1121 
Time to compute 1121 embeddings: 30.7613 seconds
---------------------------------------------------------------
