In [1]:
import sys, os, time, json

notebook_dir = os.getcwd()
parent_dir = os.path.dirname(notebook_dir)
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

from utilities.curl_utilities import curl_setup, curl_close, curl_get_embedding
import utilities.environment_config as config
from openai import AsyncOpenAI
from typing import Optional
import asyncio

In [2]:
system_prompt = (
    "You are a professional and witty movie critic. "
    "Your task is to write a concise movie review based on the provided "
    "description. Give the movie a star rating out of 5 and a clear "
    "verdict (e.g., 'Must See', 'A Flop', 'Recommended')."
)
number_of_documents = 0

In [3]:
async def generate_movie_review(movie_description: str, client: AsyncOpenAI) -> str:

    try:        
        # The user message provides the input data (the movie description).
        user_prompt = f"Movie Description: \"{movie_description}\""

        response = await client.chat.completions.create(
            model=config.generative_model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ], temperature=0.7, max_tokens=300
        )

        review = response.choices[0].message.content.strip()

        return review
    except Exception as e:
        return f"An error occurred: {e}.)"

In [4]:
async def process_all_documents() -> list[str]:

    global number_of_documents

    try:
        client = AsyncOpenAI(api_key=config.generative_model_api_key, base_url=config.generative_model_endpoint)
            
        with open(config.document_file_location, 'r', encoding='utf-8') as f:
            data = json.load(f)
    
        print("Completions generated ...", end= " ")
    
        # Prepare the raw text prompts
        extracts = list()
        for document in data:
            if 'extract' in document:
                extracts.append(document['extract'])

        batch_size = 250 # How many in-flight text generation requests are created at a time
        start_offset = 0
        end_offset = batch_size

        generated_text = list() # Let's save the generated text we get back in here so we can make our assertions later
        number_of_documents = len(extracts)
        
        # Start the counter and the text generation process
        document_index = 0
        while start_offset < number_of_documents:
            print(end_offset, end=" ")

            tasks = [
                generate_movie_review(movie_description, client)
                for movie_description in extracts[start_offset:end_offset]
            ]
        
            # Use asyncio.gather to run all tasks concurrently and wait for all to complete
            results = await asyncio.gather(*tasks)

            generated_text = generated_text + results

            start_offset = end_offset
            end_offset = end_offset + batch_size
            if end_offset > number_of_documents:
                end_offset = number_of_documents

        return generated_text
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
    except Exception as e:
        print(f"An exception occurred generating text: {e}")

In [5]:
test_description = "Starting completion task using openai library and OpenShift model server"
print(test_description)
print(len(test_description) * "-")

tic = time.perf_counter()

results = await process_all_documents()

# Let's make sure we actually got the expected number of generated text items
assert len(results) == number_of_documents
for e in results:
    assert(len(e) > 0)

# Print how long it took
toc = time.perf_counter()
print("")
print(f"Time to generate {number_of_documents} completions: {toc - tic:0.4f} seconds")
print(len(test_description) * "-")

Starting completion task using openai library and OpenShift model server
------------------------------------------------------------------------
Completions generated ... 250 500 750 1000 1121 
Time to generate 1121 completions: 220.9043 seconds
------------------------------------------------------------------------


In [9]:
results[0:3]

['**Movie Review:**\n\n"The Grudge" (2020) is a reboot of the iconic horror series, and while it tries to refresh the franchise, it ultimately falls short of delivering a truly terrifying experience. Nicolas Pesce\'s direction is competent, but the film\'s convoluted plot and lack of genuine scares make it a forgettable entry in the series.\n\nThe cast, featuring Andrea Riseborough, Demi√°n Bichir, and John Cho, delivers solid performances, but their characters are underdeveloped and lack the depth needed to make the story resonate. The film\'s attempts to connect the murders to a single house are intriguing, but the payoff is disappointing.\n\nOverall, "The Grudge" is a mediocre horror film that fails to live up to its predecessors. While it may satisfy fans of the series, newcomers may find it a disappointing entry in the franchise.\n\n**Verdict:** A Flop\n\n**Rating:** 2.5/5 stars\n\nWhile not a complete waste of time, "The Grudge" is a skippable horror film that fails to deliver th