In [None]:
import cv2
import numpy as np
from pathlib import Path

In [None]:
dataset_path = "./datasets/raw/mvtec-ad"
damage_category = "transistor"
test_path = Path(dataset_path) / damage_category / "test"
gt_mask_path = Path(dataset_path) / damage_category / "ground_truth"

In [None]:
print(test_path.exists())
print(gt_mask_path.exists())

In [None]:
def get_connected_components(gt_mask_path):
    # mask = cv2.imread(gt_mask_path, cv2.IMREAD_GRAYSCALE)
    mask = Image.open(gt_mask_path)
    mask_array = np.array(mask)

    output = cv2.connectedComponentsWithStats(mask_array, 4, cv2.CV_32S)
    numLabels, labels, stats, centroids = output
    # exclude background and take only the bounding boxes
    boxes = stats[1:, :4]
    return boxes.tolist()  # x, y, w, h

In [None]:
# Sanity check

tmp = Path("./000_mask.png")
tmp2 = get_connected_components(tmp)
tmp2

In [None]:
class_names = [entry.name for entry in test_path.iterdir() if entry.is_dir()]

print(len(class_names))
print(class_names)

In [None]:
class_names_to_id = {class_name: i+1 for i,
                     class_name in enumerate(class_names)}
class_names_to_id

In [None]:
damage_type_to_annotations = {}
file_name_to_boxes = {}

for class_name in class_names:
    if class_name == "good":
        continue

    class_path = gt_mask_path / class_name
    gt_mask_paths = [(entry.name, entry) for entry in class_path.iterdir()]
    for file_name, file_path in gt_mask_paths:
        file_name_to_boxes[file_name] = get_connected_components(file_path)

    damage_type_to_annotations[class_name] = file_name_to_boxes.copy()

In [None]:
damage_type_to_annotations

In [None]:
import os

import PIL.Image as Image

images = []

id = 0
for root, dirs, files in os.walk(test_path):
    for file in files:
        if file.endswith(".png"):
            file_path = os.path.join(root, file)
            image = Image.open(file_path)
            width, height = image.size

            relative_file_path = os.path.relpath(file_path, test_path)

            id += 1
            single_image = {
                "id": id,
                "width": width,
                "height": height,
                "file_name": relative_file_path,
                "zip_file": "test_images.zip"
            }

            images.append(single_image.copy())

print(len(images))
print(images[0])
print(images[-1])

In [None]:
annotations = []

id = 0
for image in images:
    damage_type, img_name = image['file_name'].split("/")

    if damage_type == "good":
        continue

    image_name = img_name.split(".")[0]
    mask_name = f"{image_name}_mask.png"

    for bbox in damage_type_to_annotations[damage_type][mask_name]:
        id += 1
        single_annotation = {
            "id": id,
            "category_id": class_names_to_id[damage_type],
            "image_id": image['id'],
            "bbox": bbox
        }

        annotations.append(single_annotation.copy())

print(len(annotations))
print(annotations[0])
print(annotations[-1])

In [None]:
categories = []

for class_name, class_id in class_names_to_id.items():
    category = {
        "id": class_id,
        "name": class_name}
    categories.append(category.copy())

categories

In [None]:
import json
coco_json = {
    "images": images,
    "annotations": annotations,
    "categories": categories
}


with open(f"mvtec-ad_{damage_category}_test.json", "w") as f:
    json.dump(coco_json, f)

In [None]:
import PIL.Image as Image
import numpy as np

img_path = "datasets/raw/mvtec-ad/transistor/ground_truth/bent_lead/004_mask.png"

# thresh = Image.open("000_mask.png")
thresh = Image.open(img_path)
thresh

In [None]:
thresh = np.array(thresh)
thresh.shape

In [None]:
import cv2

output = cv2.connectedComponentsWithStats(
    thresh, 4, cv2.CV_32S)
(numLabels, labels, stats, centroids) = output

In [None]:
stats

In [None]:
import cv2

import matplotlib.pyplot as plt

# Load the image
image = cv2.imread(img_path)

i = 1
# Define the bounding box coordinates
x, y, w, h = stats[i][0], stats[i][1], stats[i][2], stats[i][3]

# Draw the bounding box on the image
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

# Convert the image from BGR to RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display the image with bounding box
plt.imshow(image_rgb)
plt.axis('off')
plt.show()

In [None]:
from utils import load_local_vision_dataset, TorchDataset
from torchvision import transforms
from torchvision.transforms import ToTensor, Compose

dataset = load_local_vision_dataset(dataset_name="mvtec_ad",
                                    dataset_config_path="./datasets.json",
                                    root_dir="./",
                                    task_type="object_detection",
                                    )


transform = Compose([ToTensor()])

dataset = TorchDataset(dataset, transform=None)
sample_img, sample_tgt = dataset[70]
print(sample_img)
print(sample_tgt)

In [None]:
from torch.utils.data import DataLoader

data_loader = DataLoader(dataset, batch_size=1, shuffle=True)
data_loader_context = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt
import matplotlib.patches as patches

to_pil = ToPILImage()
img = to_pil(sample_img.squeeze(0))

bbox_ltrb = sample_tgt.squeeze(0).tolist()[1:]


# Convert the bounding box coordinates from ltrb to xywh
x, y, x2, y2 = bbox_ltrb
w, h = x2 - x, y2 - y

# Convert relative coordinates to absolute coordinates
x, y, w, h = x * img.width, y * img.height, w * img.width, h * img.height


# Create a figure and axes
fig, ax = plt.subplots()

# Display the image
ax.imshow(img)

# Create a rectangle patch
rect = patches.Rectangle((x, y), w, h, linewidth=2,
                         edgecolor='r', facecolor='none')

# Add the rectangle patch to the axes
ax.add_patch(rect)

# Show the image with the bounding box
plt.show()

In [None]:
from mimetypes import guess_type
import io

buffer = io.BytesIO()
# You can change JPEG to PNG if you prefer
img.save(buffer, format="JPEG")

guess_type(buffer.getvalue())

In [None]:
sample_tgt.squeeze(0).tolist()

In [None]:
from utils import conert_tensor_to_base64

img_base64 = conert_tensor_to_base64(sample_img)
img_base64

In [None]:
from gpt_client import GPTClient
from dotenv import load_dotenv
import os

load_dotenv()

gpt_client = GPTClient(api_base=os.getenv("AZURE_OPENAI_ENDPOINT"),
                       api_key=os.getenv("AZURE_OPENAI_API_KEY"),
                       api_version="2023-12-01-preview",
                       deployment_name="gpt4o-001")

In [None]:
response = gpt_client.get_response("", "describe the image")
print(response)

In [None]:
from utils import convert_to_base64

base64_image, _ = convert_to_base64("./sample.png")
print(base64_image)

In [None]:
gpt_client.get_response(base64_image, "use the image to tell a joke")

In [None]:
import requests

In [None]:
import json

request_body = {
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user",
            "content": [
                    {
                        "type": "text",
                        "text": "Describe this picture:"
                    },
                {
                        "type": "image_url",
                        "image_url": {
                            "url": "data:image/png;base64," + base64_image,
                        }
                    }
            ]
        }
    ],
    "max_tokens": 200
}

print(request_body)

In [None]:
headers = {'api-key': os.getenv("AZURE_OPENAI_API_KEY"), 
           'Content-Type': 'application/json'}
endpoint = "https://customvision-dev-aoai.openai.azure.com"
deployment_name = "gpt4o-001"

url = f'{endpoint}/openai/deployments/{deployment_name}/chat/completions?api-version=2024-02-15-preview'


response = requests.post(url, headers=headers, json=request_body, timeout=120)

In [None]:
result = response.json()
result

In [None]:
result.keys()