In [None]:
import os, json
import openai
import glob
import base64

from dotenv import load_dotenv
load_dotenv(".env")

aoai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
aoai_api_key = os.environ["AZURE_OPENAI_API_KEY"]
api_version = os.environ["AZURE_OPENAI_API_VERSION"]
chat_model = os.environ["AZURE_OPENAI_CHAT_MODEL"]

client = openai.AzureOpenAI( 
    azure_endpoint=aoai_endpoint,
    api_key=aoai_api_key,
    api_version= api_version
)

step_name="5_hexwrench_tight"

with open(f"./utils/{step_name}.txt", "r", encoding = 'utf-8') as f:
    user_prompt_template = f.read()
messages = []
content = []

In [None]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

In [None]:
content.append({"type": "text", 
                "text": f"You are analyzing the productivity of a chair assembly process. The following images are examples of the assembling process of a chair."})
sample_folder = f"./output/{step_name}/"

image_files = sorted(glob.glob(os.path.join(sample_folder, "*.jpg")))
for idx, image_path in enumerate(image_files, start=1):
    image_name = os.path.basename(image_path)
    base64_encoded = encode_image(image_path)
    content.append({"type": "text", "text": f"### sample{idx}"})
    content.append({"type": "image_url", "image_url":
                    {"url": "data:image/jpeg;base64," + base64_encoded}, "detail": "high"})

content.append({"type": "text", "text": user_prompt_template})

In [None]:
test_folder = "./output/"
test_images = [
    "frame_0056_t56.0s.jpg", "frame_0057_t57.0s.jpg", "frame_0058_t58.0s.jpg"
    ]

for i, img_name in enumerate(test_images, start=1):
    base64_img = encode_image(os.path.join(test_folder, img_name))
    print(f"Processing test image {i}: {img_name}")
    content.append({"type": "text", "text": f"### test{i}"})
    content.append({"type": "image_url", "image_url": 
                    {"url": "data:image/jpeg;base64," + base64_img}, "detail": "high"})

content.append({"type": "text", "text":f"Classify each of the three images whether it belongs to step {step_name}, followed by overall_result. Return a JSON object with indent matching the given schema."})

messages.append({"role": "user","content":content})

In [None]:
response = client.chat.completions.create(
    model=chat_model, 
    messages=messages,
#    temperature=0.0
    reasoning_effort="high",
#    max_completion_tokens=4000 
)
response_message = response.choices[0].message
messages.append(response_message)
answer = response_message.content
print(answer)

In [None]:
messages.append({"role": "user","content": "Show me your reasoning process."})

response = client.chat.completions.create(
    model=chat_model, 
    messages=messages,
)
response_message = response.choices[0].message
messages.append(response_message)
print(response_message.content)