In [None]:
import os, json
import base64
import openai
import glob
from typing import List, Literal
from pydantic import BaseModel, Field, RootModel

from dotenv import load_dotenv
load_dotenv(override=True)

aoai_endpoint = os.environ["AZURE_OPENAI_ENDPOINT_o3"]
aoai_api_key = os.environ["AZURE_OPENAI_API_KEY_o3"]
api_version = os.environ["AZURE_OPENAI_API_VERSION"]

In [None]:
#step_name="4_hexwrench_light"
step_name="5_hexwrench_tight"

client = openai.AzureOpenAI( 
    azure_endpoint=aoai_endpoint,
    api_key=aoai_api_key,
    api_version= api_version
)

with open(f"./{step_name}.txt", "r", encoding = 'utf-8') as f:
    user_prompt_template = f.read()
messages = []
content = []

In [None]:
class StepResponse(BaseModel):
    classification: Literal["yes", "no"] = Field(..., description="Classification of whether the images correspond to the step")
    confidence: int = Field(..., ge=1, le=5, description="Classification Score (1 as not belonging to the step, 5 as belonging to the step)")
    reasoning: str = Field(..., description="Explanation of the decision")

class StepResponseList(BaseModel):
    responses: List[StepResponse] = Field(..., description="List of responses for each test image")
    overall_result: Literal["yes", "no"] = Field(..., description="Overall classification of whether the images correspond to the step")
    reasoning: str = Field(..., description="Overall reasoning for the classification")

In [None]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

In [None]:
content.append({"type": "input_text", 
                "text": f"You are analyzing the productivity of a chair assembly process. The following images are examples of the assembling process of a chair."})
sample_folder = f"./output/{step_name}/"

image_files = sorted(glob.glob(os.path.join(sample_folder, "*.jpg")))
for idx, image_path in enumerate(image_files, start=1):
    image_name = os.path.basename(image_path)
    base64_encoded = encode_image(image_path)
    content.append({"type": "input_text", "text": f"### sample{idx}"})
    content.append({"type": "input_image", "image_url": f"data:image/jpeg;base64,{base64_encoded}", "detail": "high"})

content.append({"type": "input_text", "text": user_prompt_template})

In [None]:
test_folder = "./output/"
test_images = [
    "frame_0056_t56.0s.jpg", "frame_0057_t57.0s.jpg", "frame_0058_t58.0s.jpg"
    ]

for i, img_name in enumerate(test_images, start=1):
    base64_img = encode_image(os.path.join(test_folder, img_name))
    print(f"Processing test image {i}: {img_name}")
    content.append({"type": "input_text", "text": f"### test{i}"})
    content.append({"type": "input_image", "image_url": f"data:image/jpeg;base64,{base64_img}", "detail": "high"})

content.append({"type": "input_text", "text":f"Classify each of the three images whether it belongs to step {step_name}, followed by overall_result. Return a JSON object with indent matching the given schema."})

messages.append({"role": "user","content":content})

In [None]:
response = client.responses.parse(
    input=messages,
    model="gpt-5-mini", # replace with model deployment name
    reasoning={
        "effort": "medium", # low, medium, or high (currently only supported with o4-mini and o3)
        "summary": "auto" # auto, concise, or detailed (currently only supported with o4-mini and o3)
    },
    text_format=StepResponseList
)

print(response.output_text)

In [None]:
response2 = client.responses.parse(
    input=messages,
    model="gpt-4.1", temperature=0, # replace with model deployment name
    text_format=StepResponseList
)

print(response2.output_text)

In [None]:
data = json.loads(response.output_text)
result = "yes" if any(label["classification"] == "yes" for label in data["responses"]) else "no"

print(result) 