In [1]:
import glob

In [2]:
from PIL import Image                                      # (pip install Pillow)
import numpy as np                                         # (pip install numpy)
from skimage import measure                                # (pip install scikit-image)
from shapely.geometry import Polygon, MultiPolygon         # (pip install Shapely)
import os
import json

def create_sub_masks(mask_image, width, height):# This id will be automatically increased as we go
    annotation_id = 0
    image_id = 0
    annotations = []
    images = []
    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x,y)) #[:3]
            
#             set noise 1 and 254 to 0 and 255
            if pixel == 1:
                pixel = 0
            elif pixel == 254:
                pixel = 255
            else:
                pixel = pixel
                
            # Check to see if we have created a sub-mask...
            pixel_str = str(pixel)
            sub_mask = sub_masks.get(pixel_str)
            if sub_mask is None:
               # Create a sub-mask (one bit per pixel) and add to the dictionary
                # Note: we add 1 pixel of padding in each direction
                # because the contours module doesn"t handle cases
                # where pixels bleed to the edge of the image
                sub_masks[pixel_str] = Image.new("1", (width+2, height+2))

            # Set the pixel value to 1 (default is 0), accounting for padding
            sub_masks[pixel_str].putpixel((x+1, y+1), 1)

    return sub_masks

def create_sub_mask_annotation(sub_mask):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(np.array(sub_mask), 0.5, positive_orientation="low")

    polygons = []
    segmentations = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        
        if(poly.is_empty):
            # Go to next iteration, dont save empty values in list
            continue

#         polygons.append(poly)
        try:
            segmentation = np.array(poly.exterior.coords).ravel().tolist()
            segmentations.append(segmentation)
            polygons.append(poly)
        except:
            continue
    
    return polygons, segmentations

def create_category_annotation(category_dict):
    category_list = []

    for key, value in category_dict.items():
        if key == 'outlier':
            isthing = 0
        else:
            isthing = 1
        
        category = {
            "supercategory": key,
            "id": value,
            "name": key,
            "isthing": isthing
        }
        category_list.append(category)

    return category_list

def create_image_annotation(file_name, width, height, image_id):
    images = {
        "file_name": file_name,
        "height": height,
        "width": width,
        "id": image_id
    }

    return images

def create_annotation_format(polygon, segmentation, image_id, category_id, annotation_id):
    min_x, min_y, max_x, max_y = polygon.bounds
    width = max_x - min_x
    height = max_y - min_y
    bbox = (min_x, min_y, width, height)
    area = polygon.area

    annotation = {
#         "segmentation": segmentation,
        "area": area,
        "iscrowd": 0,
#         "image_id": image_id,
        "bbox": bbox,
        "category_id": category_id,
        "id": annotation_id
    }

    return annotation

def get_coco_json_format():
    # Standard COCO format 
    coco_format = {
        "info": {},
        "licenses": [],
        "images": [{}],
        "categories": [{}],
        "annotations": [{}]
    }

    return coco_format

In [3]:
# Label ids of the dataset
category_ids = {
    "outlier": 0,
#     "window": 1,
#     "wall": 2,
#     "balcony": 3,
#     "door": 4,
#     "roof": 5,
#     "sky": 6,
#     "shop": 7,
#     "chimney": 8,
    "rebar": 1,
    "spall": 2,
    "crack": 3,
}

# Define which colors match which categories in the images
category_colors = {
#     "(0, 0, 0)": 0, # Outlier
#     "(255, 0, 0)": 1, # Window
#     "(255, 255, 0)": 2, # Wall
#     "(128, 0, 255)": 3, # Balcony
#     "(255, 128, 0)": 4, # Door
#     "(0, 0, 255)": 5, # Roof
#     "(128, 255, 255)": 6, # Sky
#     "(0, 255, 0)": 7, # Shop
#     "(128, 128, 128)": 8, # Chimney
    "0": 0, #stuff
    "255": 1, #defect
    "155": 2,
    '055': 3,
#     "254": 9
}

category_defect = {
    "rebar": 1,
    "spall": 2,
    "crack": 3,
}

# Define the ids that are a multiplolygon. In our case: wall, roof and sky
multipolygon_ids = []

In [10]:
# Get "images" and "annotations" info 
def images_annotations_info(maskpath, imagepath):
    # This id will be automatically increased as we go
    annotation_id = 0
    image_id = 0
    annotations = []
    images = []
    
    for mask_image in glob.glob(maskpath + "*.jpg"):
        # The mask image is *.png but the original image is *.jpg.
        # We make a reference to the original file in the COCO JSON file
#         print(mask_image)
        segments_info = []
        fname = mask_image.rsplit('/', 1)[1]
        original_file_name = fname.split('.', 1)[0][:-5]+'.jpg'
        defect = fname.split('.', 1)[0][-5:]

#         original_file_name = os.path.basename(mask_image).split(".")[0] + ".jpg"

        # Open the image and (to be sure) we convert it to RGB
        mask_image_open = Image.open(mask_image) #.convert("RGB")
        w, h = mask_image_open.size
        
        # "images" info 
        image = create_image_annotation(fname, w, h, image_id)
        images.append(image)

        sub_masks = create_sub_masks(mask_image_open, w, h)
        for color, sub_mask in sub_masks.items():
            
            if color not in ['255']:
                continue
#             print(color)
            if color == 0:
                category_id = category_colors[color]
            else:
                category_id = category_defect[defect]
            
            # "annotations" info
            polygons, segmentations = create_sub_mask_annotation(sub_mask)

            # Check if we have classes that are a multipolygon
            if category_id in multipolygon_ids:
                # Combine the polygons to calculate the bounding box and area
                multi_poly = MultiPolygon(polygons)
                                
                annotation = create_annotation_format(multi_poly, segmentations, image_id, category_id, annotation_id)

                [x,y,w,h] = annotation['bbox']
                if w > 3 and h > 3:                     
                    segments_info.append(annotation)
                    annotation_id += 1
            else:
#                 print(len(polygons))
                for i in range(len(polygons)):
                    # Cleaner to recalculate this variable
                    segmentation = [np.array(polygons[i].exterior.coords).ravel().tolist()]
                    
                    annotation = create_annotation_format(polygons[i], segmentation, image_id, category_id, annotation_id)
                    
                    [x,y,w,h] = annotation['bbox']
                    if w > 3 and h > 3: 
#                         print('add annotation')
                        segments_info.append(annotation)
                        annotation_id += 1
        annotations.append({"segments_info": segments_info,
                            "file_name": fname,
                            "image_id": image_id})
        image_id += 1
        
#         if image_id > 10:
#             break
    return images, annotations, annotation_id

In [11]:
if __name__ == "__main__":
    # Get the standard COCO JSON format
    coco_format = get_coco_json_format()
    
    for keyword in ["train", "test"]:
        mask_path = "/home/sn/EVA7/capstone/dataset/{}_mask/".format(keyword)
        imagepath = '/home/sn/EVA7/capstone/dataset/{}_images/'.format(keyword)
        
        # Create category section
        coco_format["categories"] = create_category_annotation(category_ids)
    
        # Create images and annotations sections
        coco_format["images"], coco_format["annotations"], annotation_cnt = images_annotations_info(mask_path, imagepath)

        with open("/home/sn/EVA7/capstone/dataset/annotations/panoptic_{}2017.json".format(keyword),"w") as outfile:
            json.dump(coco_format, outfile)
        
        print("Created %d annotations for images in folder: %s" % (annotation_cnt, mask_path))


Created 1750 annotations for images in folder: /home/sn/EVA7/capstone/dataset/train_mask/
Created 443 annotations for images in folder: /home/sn/EVA7/capstone/dataset/test_mask/


In [8]:
# multipolygon_ids

[]

In [66]:
# imag1 = Image.open('/home/sn/EVA7/capstone/dataset/train_mask/00367rebar.jpg')
# width, height = imag1.size

# sub_masks = create_sub_masks(imag1, width, height )

In [67]:
# sub_masks

{'0': <PIL.Image.Image image mode=1 size=1026x892 at 0x7F467DA35EE0>,
 '255': <PIL.Image.Image image mode=1 size=1026x892 at 0x7F467DE55F10>}

In [71]:
# image_id = 0
# annotation_id = 0
# annotations = []
# for color, sub_mask in sub_masks.items():

#     if color not in ['255']:
#         continue

# #     if color == 0:
#     category_id = category_colors[color]
# #     else:
# #         category_id = category_defect[defect]

#     # "annotations" info
#     polygons, segmentations = create_sub_mask_annotation(sub_mask)

#     # Check if we have classes that are a multipolygon
#     if category_id in multipolygon_ids:
#         # Combine the polygons to calculate the bounding box and area
#         multi_poly = MultiPolygon(polygons)

#         annotation = create_annotation_format(multi_poly, segmentations, image_id, category_id, annotation_id)

#         [x,y,w,h] = annotation['bbox']
#         if w > 3 and h > 3:                     
#             annotations.append(annotation)
#             annotation_id += 1
#     else:
#         for i in range(len(polygons)):
#             # Cleaner to recalculate this variable
#             segmentation = [np.array(polygons[i].exterior.coords).ravel().tolist()]

#             annotation = create_annotation_format(polygons[i], segmentation, image_id, category_id, annotation_id)

#             [x,y,w,h] = annotation['bbox']
#             if w > 3 and h > 3:                     
#                 annotations.append(annotation)
#                 annotation_id += 1
# image_id += 1


In [72]:
# annotations

[{'segmentation': [[393.0,
    801.5,
    389.0,
    801.5,
    377.0,
    793.5,
    349.0,
    793.5,
    345.0,
    795.5,
    339.0,
    793.5,
    326.0,
    793.5,
    314.0,
    788.5,
    309.0,
    783.5,
    303.0,
    781.5,
    292.0,
    773.5,
    276.0,
    773.5,
    269.0,
    769.5,
    257.0,
    770.5,
    245.0,
    763.5,
    230.0,
    763.5,
    217.0,
    759.5,
    211.0,
    755.5,
    191.0,
    750.5,
    176.0,
    739.5,
    168.0,
    736.5,
    155.0,
    727.5,
    144.5,
    716.0,
    137.5,
    700.0,
    128.5,
    691.0,
    126.5,
    684.0,
    125.5,
    665.0,
    116.5,
    637.0,
    117.5,
    603.0,
    119.5,
    598.0,
    127.0,
    588.5,
    142.5,
    579.0,
    147.5,
    566.0,
    147.5,
    536.0,
    138.5,
    513.0,
    137.5,
    504.0,
    134.5,
    500.0,
    134.5,
    493.0,
    129.5,
    481.0,
    128.5,
    464.0,
    131.5,
    459.0,
    131.5,
    448.0,
    141.5,
    432.0,
    139.5,
    422.0,
    146.5,
    4