In [16]:
import json
import numpy as np
import os
import skimage.io
import skimage.draw
from PIL import Image
import shutil
import matplotlib.pyplot as plt
import glob

# Help class

In [17]:
class Dataset(object):

    def __init__(self, class_map=None):
        self._image_ids = []
        self.image_info = []
        self.class_info = [{"source": "", "id": 0, "name": "BG"}]
        self.source_class_ids = {}

    def add_class(self, source, class_id, class_name):
        assert "." not in source, "Source name cannot contain a dot"
        # Does the class exist already?
        for info in self.class_info:
            if info['source'] == source and info["id"] == class_id:
                return
        # Add the class
        self.class_info.append({"source": source, "id": class_id, "name": class_name})

    def add_image(self, source, image_id, path, **kwargs):
        image_info = {"id": image_id, "source": source, "path": path}
        image_info.update(kwargs)
        self.image_info.append(image_info)
        
    def prepare(self, class_map=None):

        def clean_name(name):
            return ",".join(name.split(",")[:1])

        # Build (or rebuild) everything else from the info dicts.
        self.num_classes = len(self.class_info)
        self.class_ids = np.arange(self.num_classes)
        self.class_names = [clean_name(c["name"]) for c in self.class_info]
        self.num_images = len(self.image_info)
        self._image_ids = np.arange(self.num_images)

        # Mapping from source class and image IDs to internal IDs
        self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id for info, id in zip(self.class_info, self.class_ids)}
        self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id for info, id in zip(self.image_info, self.image_ids)}

        # Map sources to class_ids they support
        self.sources = list(set([i['source'] for i in self.class_info]))
        self.source_class_ids = {}
        # Loop over datasets
        for source in self.sources:
            self.source_class_ids[source] = []
            # Find classes that belong to this dataset
            for i, info in enumerate(self.class_info):
                # Include BG class in all datasets
                if i == 0 or source == info['source']:
                    self.source_class_ids[source].append(i)
                    
    @property
    def image_ids(self):
        return self._image_ids
    
    def load_image(self, image_id):
        """Load the specified image and return a [H,W,3] Numpy array.
        """
        # Load image
        image = skimage.io.imread(self.image_info[image_id]['path'])
        # If grayscale. Convert to RGB for consistency.
        if image.ndim != 3:
            image = skimage.color.gray2rgb(image)
        # If has an alpha channel, remove it for consistency
        if image.shape[-1] == 4:
            image = image[..., :3]
        return image

In [18]:
class CustomDataset(Dataset):

    def load_custom(self, dataset_dir, label_file):
        
        # Add classes. In binary,one foreground only one class to add.
        self.add_class("eyeglasses", 1, "eyeglasses")
        # We mostly care about the x and y coordinates of each region
        annotations1 = json.load(open(os.path.join(dataset_dir, label_file),'r',encoding="utf8",errors='ignore'))
        # don't need the dict keys
        annotations = list(annotations1.values())  
        # The VIA tool saves images in the JSON even if they don't have any  annotations. Skip unannotated images.
        annotations = [a for a in annotations if a['regions']]
        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons 
            polygons = [r['shape_attributes'] for r in a['regions']]
            # load_mask() needs the image size to convert polygons to masks.
            image_path = os.path.join(dataset_dir, a['filename']) 
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]
            self.add_image("eyeglasses", image_id=a['filename'], path=image_path, width=width, height=height, polygons=polygons)
            
    def load_mask(self, image_id):
        """Generate instance masks for an image.
        masks     : A bool array of shape [height, width, instance count] with one mask per instance.
        class_ids : a 1D array of class IDs of the instance masks.
        """
        # If not a balloon dataset image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "eyeglasses":
            return super(self.__class__, self).load_mask(image_id)
        # Convert polygons to a bitmap mask of shape [height, width, instance_count]
        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])], dtype=np.uint8)
        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1
        # Return mask, and array of class IDs of each instance. Since we have one class ID only, we return an array of 1s
        return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
    
    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "eyeglasses":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)

# Read json

In [19]:
# Load dataset
dataset    = CustomDataset()
data_dir   = "C:\\Users\\gueganj\\Downloads\\"
label_file = "via_project_3Feb2021_14h1m_json.json"
# place your json file inside train folder
dataset.load_custom(data_dir,label_file)
# Must call before using the dataset
dataset.prepare()

# Format and Save

In [20]:
index      = dataset.image_ids
for image_id in index:
    image           = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    # Pick top prominent classes in this image
    unique_class_ids = np.unique(class_ids)
    mask_area        = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) for i in unique_class_ids]
    top_ids          = [v[0] for v in sorted(zip(unique_class_ids, mask_area),key=lambda r: r[1], reverse=True) if v[1] > 0]
    # Generate images and titles
    for i in range(1):
        class_id = top_ids[i] if i < len(top_ids) else -1
        # Pull masks of instances belonging to the same class.
        m = mask[:, :, np.where(class_ids == class_id)[0]]
        m = np.sum(m * np.arange(1, m.shape[-1] +1), -1)
        m[m>1] = 1
        img = (255 * m).astype(np.uint8)
        name_image = os.path.basename(dataset.image_info[image_id]['id']).replace('.jpg','').replace('.JPG','').replace('.png','')
        plt.imsave(os.path.join(data_dir, name_image+'.png'), img, cmap='binary_r')