In [4]:
import os
from enum import Enum
import time
from PIL import Image
from gpt_client import ContentType, GPTClient
from torchvision.transforms import Compose, ToTensor

from utils import (
    TorchDataset,
    convert_tensor_to_base64,
    convert_to_base64,
    load_local_vision_dataset,
)


from dotenv import load_dotenv
load_dotenv()

True

### Azure OpenAI Resource

In [5]:
API_ENDPOINT = os.getenv('AZURE_OPENAI_ENDPOINT')
DEPLOYMENT_NAME = os.getenv('DEPLOYMENT_NAME')

### Prepare Dataset

In [6]:
dataset_name = "mppd_ad_metal_plate"

root_dir = "./"
dataset_config_path = "/home/nisyad/projects/industrial-defect-detection/datasets.json"

dataset = load_local_vision_dataset(dataset_name=dataset_name,
                                    dataset_config_path=dataset_config_path,
                                    root_dir=root_dir,
                                    task_type="object_detection",
                                    )


transform = Compose([ToTensor()])

dataset = TorchDataset(dataset, transform=transform)

# Sanity Check
print("Number of Samples: ", len(dataset))
sample_img, sample_tgt = dataset[-1]
print("Image shape: ", sample_img.shape)
print("Target: ", sample_tgt) # [class, LTRB]

Number of Samples:  97
Image shape:  torch.Size([3, 1024, 1024])
Target:  tensor([[1.0000, 0.6182, 0.2256, 0.8672, 0.7930]])


### Create Prompt

In [7]:
prompt = """You are an expert visual inspector for a manufacturing company that makes glass bottles. You will be shown a top-view image of a glass bottle and your task is to identify if it is defective or not. Think step-by-step - first identify if there is a defect or not. Second, if there is a defect, identify the type of defect. **IF** present, the defect can only be of the following types: 1.broken 2.contamination. Third, explain your reasoning for the defect if present. Finally, identify where the defect is located in the image and provide the relative coordinates (between 0-1) of the bounding box enclosing the defect in the format [x_top, y_top, x_bottom, y_bottom]. Please return your response **strictly** as a valid JSON object with the following format:
{"is_defective": "<yes or no>",
"reason": "<describe the defect. leave empty if is_defective is no>",
"defect_type": "<type of defect. leave empty if is_defective is no>",
"bounding_box": "[x_top, y_top, x_bottom, y_bottom]. leave empty if is_defective is no"}

Note: If  there is no defect or if you are unsure, please return "is_defective": "no" and leave the other fields empty.
"""

prompt

# Sanity Check
# prompt = "Describe the image in JSON format"

'You are an expert visual inspector for a manufacturing company that makes glass bottles. You will be shown a top-view image of a glass bottle and your task is to identify if it is defective or not. Think step-by-step - first identify if there is a defect or not. Second, if there is a defect, identify the type of defect. **IF** present, the defect can only be of the following types: 1.broken 2.contamination. Third, explain your reasoning for the defect if present. Finally, identify where the defect is located in the image and provide the relative coordinates (between 0-1) of the bounding box enclosing the defect in the format [x_top, y_top, x_bottom, y_bottom]. Please return your response **strictly** as a valid JSON object with the following format:\n{"is_defective": "<yes or no>",\n"reason": "<describe the defect. leave empty if is_defective is no>",\n"defect_type": "<type of defect. leave empty if is_defective is no>",\n"bounding_box": "[x_top, y_top, x_bottom, y_bottom]. leave empt

### GPT Client

In [8]:
responses = []

for sample_idx in range(len(dataset)):
    print(f"Processing sample {sample_idx}...")
    
    sample_img, sample_tgt = dataset[sample_idx]
    base64_image, mime_type = convert_tensor_to_base64(sample_img)

    gpt_client = GPTClient(
    api_base=API_ENDPOINT,
    deployment_name=DEPLOYMENT_NAME)

    content = [
        {"type": ContentType.TEXT, "text": prompt},
        {"type": ContentType.IMAGE, "url": f"data:{mime_type};base64," + base64_image}
        # Add more content if needed
    ]

    gpt_client.update_messages(content=content)
    raw_response = gpt_client.get_response()
    outout = gpt_client.extract_output(raw_response)

    responses.append({
        "sample_idx": sample_idx,
        "response": outout})
    
    # Wait for 20 seconds
    time.sleep(20)


Processing sample 0...


In [9]:
responses

[{'sample_idx': 0,
  'response': '```json\n{\n  "is_defective": "no",\n  "reason": "",\n  "defect_type": "",\n  "bounding_box": ""\n}\n```'}]