In [1]:
import json
import os
import requests
import torch
from dotenv import load_dotenv
from torchvision import transforms
from torchvision.transforms import Compose, ToTensor
from PIL import Image

from azure.identity import get_bearer_token_provider, DefaultAzureCredential
from openai import AzureOpenAI

from utils import (
    TorchDataset,
    convert_tensor_to_base64,
    convert_to_base64,
    load_local_vision_dataset,
)

import tenacity

load_dotenv()


True

In [2]:
# prompt = """You are an expert visual inspector for a manufacturing company that makes glass bottles. You will be shown a top-view image of a glass bottle and your task is to identify if it is defective or not. Think step-by-step - first identify if there is a defect or not. Second, if there is a defect, identify the type of defect. **IF** present, the defect can only be of the following types: 1.broken 2.contamination. Third, explain your reasoning for the defect if present. Finally, identify where the defect is located in the image and provide the relative coordinates (between 0-1) of the bounding box enclosing the defect in the format [x_top, y_top, x_bottom, y_bottom]. Please return your response **strictly** as a valid JSON object with the following format:
# {"is_defective": "<yes or no>",
# "reason": "<describe the defect. leave empty if is_defective is no>",
# "defect_type": "<type of defect. leave empty if is_defective is no>",
# "bounding_box": "[x_top, y_top, x_bottom, y_bottom]. leave empty if is_defective is no"}

# Note: If  there is no defect or if you are unsure, please return "is_defective": "no" and leave the other fields empty.
# """

# prompt

prompt = "Describe the image in JSON format"

In [3]:
def get_messages_body(prompt, base64_image, mime_type):
    return [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user",
            "content": [
                    {
                        "type": "text",
                        "text": f"{prompt}"
                    },
                {
                        "type": "image_url",
                        "image_url": {
                                "url": f"data:{mime_type};base64," + f"{base64_image}",
                        }
                }
            ]
        }
    ]

In [4]:
sample_img_path = "datasets/raw/mvtec-ad/bottle/test/good/000.png"

# base64_image = convert_tensor_to_base64(sample_img)
base64_image, mime_type = convert_to_base64(sample_img_path)
print(mime_type)



image/png


In [24]:
token_provider = get_bearer_token_provider(
    DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)

client = AzureOpenAI(
    api_version="2024-02-15-preview",
    azure_endpoint="https://customvision-dev-aoai.openai.azure.com/",
    azure_ad_token_provider=token_provider
)

num_trial = 1

@tenacity.retry(
    stop=tenacity.stop_after_attempt(10),
    wait=tenacity.wait_exponential(multiplier=2, min=30, max=128),
    retry=tenacity.retry_if_exception_type(Exception),
)
def get_response():
    try:
        response = client.chat.completions.create(
            model="gpt4-001",
            messages=get_messages_body(prompt, base64_image, mime_type),
            max_tokens=10,
            response_format={"type": "json_object"},  # enforce json o/p
        )
        return response
    except Exception as e:
        print(f"Error: {e}")
        raise e

In [25]:
response = get_response()

Trying to get response...
1
Error: Error code: 404 - {'error': {'code': 'DeploymentNotFound', 'message': 'The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.'}}
Trying to get response...
1
Error: Error code: 404 - {'error': {'code': 'DeploymentNotFound', 'message': 'The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.'}}
Trying to get response...
1
Error: Error code: 404 - {'error': {'code': 'DeploymentNotFound', 'message': 'The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.'}}


RetryError: RetryError[<Future at 0x7fc450eb53a0 state=finished raised NotFoundError>]

In [9]:
import json

result = json.loads(response.model_dump_json())
result["choices"][0]["message"]["content"]

'{\n  "image": {\n    "description":'

In [None]:
import instructor
from pydantic import BaseModel

In [None]:
prompt = """You are an expert visual inspector for a manufacturing company that makes glass bottles. You will be shown a top-view image of a glass bottle and your task is to identify if it is defective or not. Think step-by-step - first identify if there is a defect or not. Second, if there is a defect, identify the type of defect. **IF** present, the defect can only be of the following types: 1.broken 2.contamination. Third, explain your reasoning for the defect if present. Finally, identify where the defect is located in the image and provide the relative coordinates (between 0-1) of the bounding box enclosing the defect in the format [x_top, y_top, x_bottom, y_bottom]. Please return your response **strictly** as a valid JSON object as defined by the IndustrialDefect Pydantic model.

Note: If  there is no defect or if you are unsure, please return defect_type as 'no_defect' and leave the other fields empty.
"""

In [None]:
from enum import Enum

# Define an Enum class for defect types
class DefectType(str, Enum):
    NO_DEFECT = "no_defect"
    CRACK = "crack"
    DENT = "dent"
    SCRATCH = "scratch"

# Define your Pydantic model
class IndustrialDefect(BaseModel):
    defect_type: DefectType
    reason: str = ""


# Patch the OpenAI client
client = instructor.patch(client)

# Extract structured data from natural language
defect = client.chat.completions.create(
    model="gpt4o-001",
    response_model=IndustrialDefect,
    messages=get_messages_body(prompt, base64_image, mime_type),
)

defect

In [None]:
defect.defect_type