In [None]:
import cv2
import numpy as np
from pathlib import Path

In [None]:
dataset_path = "./datasets/raw/mvtec-ad"
damage_category = "transistor"
test_path = Path(dataset_path) / damage_category / "test"
gt_mask_path = Path(dataset_path) / damage_category / "ground_truth"

In [None]:
print(test_path.exists())
print(gt_mask_path.exists())


In [None]:
def get_connected_components(gt_mask_path):
    # mask = cv2.imread(gt_mask_path, cv2.IMREAD_GRAYSCALE)
    mask = Image.open(gt_mask_path)
    mask_array = np.array(mask)
    
    output = cv2.connectedComponentsWithStats(mask_array, 4 , cv2.CV_32S)
    numLabels, labels, stats, centroids = output
    boxes = stats[1:, :4] # exclude background and take only the bounding boxes
    return boxes.tolist() # x, y, w, h
    

In [None]:
# Sanity check

tmp = Path("./000_mask.png")
tmp2 = get_connected_components(tmp)
tmp2

In [None]:
class_names = [entry.name for entry in test_path.iterdir() if entry.is_dir()]

print(len(class_names))
print(class_names)

In [None]:
class_names_to_id = {class_name: i+1 for i, class_name in enumerate(class_names)}
class_names_to_id   

In [None]:
damage_type_to_annotations = {}
file_name_to_boxes = {}

for class_name in class_names:
    if class_name == "good":
        continue
    
    class_path = gt_mask_path / class_name
    gt_mask_paths = [(entry.name, entry) for entry in class_path.iterdir()]
    for file_name, file_path in gt_mask_paths:
        file_name_to_boxes[file_name] = get_connected_components(file_path)
    
    damage_type_to_annotations[class_name] = file_name_to_boxes.copy()
    

In [None]:
damage_type_to_annotations

In [None]:
import os

import PIL.Image as Image

images = []

id = 0
for root, dirs, files in os.walk(test_path):
    for file in files:
        if file.endswith(".png"):
            file_path = os.path.join(root, file)
            image = Image.open(file_path)
            width, height = image.size
            
            relative_file_path = os.path.relpath(file_path, test_path)
            
            id += 1
            single_image = {
                "id": id,
                "width": width,
                "height": height,
                "file_name": relative_file_path,
                "zip_file": "test_images.zip"
            }
            
            images.append(single_image.copy())

print(len(images))
print(images[0])
print(images[-1])

In [None]:
annotations = []

id = 0
for image in images:
    damage_type, img_name = image['file_name'].split("/")
    
    if damage_type == "good":
        continue
    
    image_name = img_name.split(".")[0]
    mask_name = f"{image_name}_mask.png"
    
    for bbox in damage_type_to_annotations[damage_type][mask_name]:
        id += 1
        single_annotation = {
            "id": id,
            "category_id": class_names_to_id[damage_type],
            "image_id": image['id'],
            "bbox": bbox
        }
        
        annotations.append(single_annotation.copy())
    
print(len(annotations))
print(annotations[0])
print(annotations[-1])
    

In [None]:
categories = []

for class_name, class_id in class_names_to_id.items():
    category = {
        "id": class_id,
        "name": class_name}
    categories.append(category.copy())

categories

In [None]:
coco_json = {
    "images": images,
    "annotations": annotations,
    "categories": categories
}

import json

with open(f"mvtec-ad_{damage_category}_test.json", "w") as f:
    json.dump(coco_json, f)

In [None]:
import PIL.Image as Image
import numpy as np

img_path = "datasets/raw/mvtec-ad/transistor/ground_truth/bent_lead/004_mask.png"

# thresh = Image.open("000_mask.png")
thresh = Image.open(img_path)
thresh

In [None]:
thresh = np.array(thresh)
thresh.shape

In [None]:
import cv2

output = cv2.connectedComponentsWithStats(
	thresh, 4 , cv2.CV_32S)
(numLabels, labels, stats, centroids) = output


In [None]:
stats

In [None]:
import cv2

import matplotlib.pyplot as plt

# Load the image
image = cv2.imread(img_path)

i = 1
# Define the bounding box coordinates
x, y, w, h = stats[i][0], stats[i][1], stats[i][2], stats[i][3]

# Draw the bounding box on the image
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)

# Convert the image from BGR to RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display the image with bounding box
plt.imshow(image_rgb)
plt.axis('off')
plt.show()

In [2]:
import pathlib
from vision_datasets.common import Usages, DatasetHub

dataset_infos_json_path = 'datasets.json'
dataset_hub = DatasetHub(pathlib.Path(dataset_infos_json_path).read_text(), container_url=None, local_dir="./")
stanford_cars = dataset_hub.create_vision_dataset('mvtec_ad', version=1, usage=Usages.TEST)

# note that you can pass multiple datasets.json to DatasetHub, it can combine them all
# example: DatasetHub([ds_json1, ds_json2, ...])
# note that you can specify multiple usages in create_manifest_dataset call
# example dataset_hub.create_manifest_dataset('stanford-cars', version=1, usage=[Usages.TRAIN, Usages.VAL])

for img, targets, sample_idx_str in stanford_cars:
    # img.show()
    # img.close()
    print(targets)

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[<vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest object at 0x7f1f6cbbbcb0>]
[<vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest object at 0x7f1f6cbbbc80>, <vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest object at 0x7f1f6cbbb9e0>]
[<vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest object at 0x7f1f6cbbb4d0>]
[<vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest object at 0x7f1f6cbbbe30>]
[<vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest object at 0x7f1f6cb46a50>]
[<vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest object at 0x7f1f6cb47590>]
[<vision_datasets.image_object_detection.man

In [7]:
vars(targets[0])

{'additional_info': {},
 '_label_data': [4, 0.310546875, 0.3037109375, 0.396484375, 0.41015625],
 'label_path': None}

In [52]:
dataset_info = {
        "name": "mvtec_ad",
        "version": 1,
        "description": "A sampled ms-coco dataset.",
        "type": "object_detection",
        "format": "coco",
        "root_folder": "datasets/processed/mvtect_ad_transistor",
        "test": {
            "index_path": "test.json",
            "files_for_local_usage": [
                "test.zip"
            ]
        }
    }

In [53]:
from vision_datasets.torch import TorchDataset

In [54]:
from torchvision import transforms
from torchvision.transforms import ToTensor

transform = transforms.Compose([ToTensor()])

In [55]:
dataset = TorchDataset(stanford_cars, transform=transform)

In [57]:
dataset[70]
   

(tensor([[[0.4863, 0.4667, 0.4314,  ..., 0.5098, 0.5059, 0.5098],
          [0.4784, 0.4824, 0.4745,  ..., 0.5490, 0.5333, 0.5176],
          [0.4902, 0.4902, 0.4941,  ..., 0.5608, 0.5490, 0.5373],
          ...,
          [0.5294, 0.5412, 0.5451,  ..., 0.5569, 0.5608, 0.5725],
          [0.5529, 0.5490, 0.5529,  ..., 0.5804, 0.5882, 0.5804],
          [0.5176, 0.5216, 0.5216,  ..., 0.5569, 0.5529, 0.5569]],
 
         [[0.3529, 0.3451, 0.3294,  ..., 0.4039, 0.3961, 0.3922],
          [0.3569, 0.3490, 0.3490,  ..., 0.4039, 0.3961, 0.3961],
          [0.3529, 0.3608, 0.3647,  ..., 0.4157, 0.4039, 0.4000],
          ...,
          [0.3804, 0.3961, 0.3922,  ..., 0.4118, 0.4196, 0.4235],
          [0.3961, 0.4000, 0.4078,  ..., 0.4235, 0.4235, 0.4275],
          [0.3765, 0.3804, 0.3725,  ..., 0.3882, 0.3961, 0.4000]],
 
         [[0.3098, 0.3176, 0.2824,  ..., 0.3608, 0.3529, 0.3569],
          [0.3098, 0.3176, 0.2902,  ..., 0.3529, 0.3490, 0.3529],
          [0.3059, 0.3059, 0.3020,  ...,

In [73]:
_, targets, _ = dataset[0]
targets

[]

In [75]:
import torch
tmp = torch.tensor([t.label_data for t in targets], dtype=torch.float32).reshape(-1, 5)

tensor([], size=(0, 5))

0

In [33]:
import torch
from torch.utils.data import DataLoader

batch_size = 1
num_workers = 1

dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers)


In [34]:
for batch in dataloader:
    imgs, targets, sample_idx_str = batch
    print(imgs.shape)
    print(targets)
    print(sample_idx_str)
    

torch.Size([1, 3, 1024, 1024])
[]
('0',)
torch.Size([1, 3, 1024, 1024])
[]
('1',)
torch.Size([1, 3, 1024, 1024])
[]
('2',)
torch.Size([1, 3, 1024, 1024])
[]
('3',)
torch.Size([1, 3, 1024, 1024])
[]
('4',)
torch.Size([1, 3, 1024, 1024])
[]
('5',)
torch.Size([1, 3, 1024, 1024])
[]
('6',)
torch.Size([1, 3, 1024, 1024])
[]
('7',)
torch.Size([1, 3, 1024, 1024])
[]
('8',)
torch.Size([1, 3, 1024, 1024])
[]
('9',)
torch.Size([1, 3, 1024, 1024])
[]
('10',)
torch.Size([1, 3, 1024, 1024])
[]
('11',)
torch.Size([1, 3, 1024, 1024])
[]
('12',)
torch.Size([1, 3, 1024, 1024])
[]
('13',)
torch.Size([1, 3, 1024, 1024])
[]
('14',)
torch.Size([1, 3, 1024, 1024])
[]
('15',)
torch.Size([1, 3, 1024, 1024])
[]
('16',)
torch.Size([1, 3, 1024, 1024])
[]
('17',)
torch.Size([1, 3, 1024, 1024])
[]
('18',)
torch.Size([1, 3, 1024, 1024])
[]
('19',)
torch.Size([1, 3, 1024, 1024])
[]
('20',)
torch.Size([1, 3, 1024, 1024])
[]
('21',)
torch.Size([1, 3, 1024, 1024])
[]
('22',)
torch.Size([1, 3, 1024, 1024])
[]
('23',)
to

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/collate.py", line 182, in collate
    clone[i] = collate(samples, collate_fn_map=collate_fn_map)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/collate.py", line 191, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest'>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/collate.py", line 316, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/collate.py", line 173, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/collate.py", line 189, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/nisyad/miniconda3/envs/myenv/lib/python3.12/site-packages/torch/utils/data/_utils/collate.py", line 191, in collate
    raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'vision_datasets.image_object_detection.manifest.ImageObjectDetectionLabelManifest'>
