In [4]:
import cv2
import json
import numpy as np
import pandas as pd
from pathlib import Path

In [5]:
SOURCE_PATH = 'datasets/pills/data/train'
TARGET_PATH = 'datasets/pills/data/masked'
LABEL_PATH = 'datasets/pills/class_label.csv'

In [6]:
dataset = {
    "info": {"description": "Pill Segmentation Dataset", "version": "1.0"},
    "images": [],
    "annotations": [],
    "shape_categories": [
        {"id": 1, "name": "circle"},
        {"id": 2, "name": "ellipse"}
    ]
}

In [7]:
class_label = pd.read_csv(LABEL_PATH, header=0)

In [8]:
class_dict = {}
for i, row in class_label.iterrows():
  class_dict[row['class']] = row['label'].split('#')

In [9]:
source_path = Path(SOURCE_PATH)

In [10]:
annotation_id = 1
image_id = 1
for folder in source_path.iterdir():
  for path in folder.iterdir():
    try: 
      pill_types = []

      kind_of_pills = class_dict[folder.stem]
      
      bgra_image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
      mask = (bgra_image[:,:,3] == 0)
      bgra_image[mask, :] = 0

      gray_image = cv2.cvtColor(bgra_image, cv2.COLOR_BGRA2GRAY)
      
      contours = cv2.findContours(gray_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
      for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        kind_of_pills = class_dict[folder.stem]

        perimeter = cv2.arcLength(cnt, True)
        area = cv2.contourArea(cnt)

        if perimeter == 0:
          continue
        circularity = 4 * np.pi * area / perimeter**2
        x, y, w, h = cv2.boundingRect(cnt)
        aspect_ratio = w / float(h)
        if circularity > 0.85 and 1.05 > aspect_ratio > 0.95:
          pill_types.append(1)
          pill_type = 1
        else:
          pill_types.append(2)
          pill_type = 2
          
        M = cv2.moments(cnt)
        if M['m00'] == 0:
          continue
        cx = int(M['m10']/M['m00'])
        cy = int(M['m01']/M['m00'])
          
        dataset["annotations"].append({
          "id": annotation_id,
          "image_id": image_id,
          "shape": pill_type,
          "centroid": [cx, cy],
          "bbox": [x, y, w, h],
          "segmentation": cnt.flatten().tolist(),
        })

        annotation_id += 1

      dataset["images"].append({
        "id": image_id,
        "file_path": "data/train/" + folder.stem + "/" + path.name,
        "height": bgra_image.shape[0],
        "width": bgra_image.shape[1],
        "pill_count": len(kind_of_pills),
        "kind_of_pills": kind_of_pills,
        "pill_shapes": list(set(pill_types))
      })
    
      image_id += 1
    except Exception as e:
      print(e, path)

'NoneType' object is not subscriptable datasets/pills/data/train/C0694872/D1272727.png
'NoneType' object is not subscriptable datasets/pills/data/train/C0765158/D1381935.png
'NoneType' object is not subscriptable datasets/pills/data/train/C0779312/D1407143.png


In [12]:
with open('datasets/pills/data/dataset.json', 'w') as f:
    json.dump(dataset, f, indent=4)