In [1]:
import pathlib
import json
import os

import skimage.io
import numpy as np
import json
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog

In [2]:
def get_data_dicts(json_path):
    """
    Loads data in format consistent with detectron2.
    Adapted from balloon example here:
    https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5
    
    Inputs: 
      json_path: string or pathlib path to json file containing relevant annotations
    
    Outputs:
      dataset_dicts: list(dic) of datasets compatible for detectron 2
                     More information can be found at:
                     https://detectron2.readthedocs.io/tutorials/datasets.html#
    """
    json_path = pathlib.Path(json_path) # needed for path manipulations
    with open(json_path) as f:
        via_data = json.load(f)
        
    # root directory of images is given by relative path in json file
    img_root = pathlib.Path(json_path.parent, via_data['_via_settings']['core']['default_filepath'])
    imgs_anns = via_data['_via_img_metadata']
    
    
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}

        filename = pathlib.Path(img_root, v["filename"])
        
        # inefficient for large sets of images, read from json?
        height, width = skimage.io.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
        record["dataset_class"] = v['file_attributes']['Image Class']
        
        annos = v["regions"]
        objs = []
        for anno in annos:
            # not sure why this was here, commenting it out didn't seem to break anything
            #assert not anno["region_attributes"] 
            anno = anno["shape_attributes"]
            
            # polygon masks is list of polygon coordinates in format ([x0,y0,x1,y1...xn,yn]) as specified in
            # https://detectron2.readthedocs.io/modules/structures.html#detectron2.structures.PolygonMasks
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            
            
            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS, # boxes are given in absolute coordinates (ie not corner+width+height)
                "segmentation": [poly],
                "category_id": 0,
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

def split_data_dict(dataset_dicts, get_subset=None):
    """
    Splits data from json into subsets (ie training/validation/testing)
    
    inputs 
      dataset_dicts- list(dic) from get_data_dicts()
      get_subset- function that identifies 
                  class of each item  in dataset_dict.
                  For example, get_subset(dataset_dicts[0])
                  returns 'Training', 'Validation', 'Test', etc
                  If None, default function is used
    
    returns
      subs- dictionary where each key is the class of data
            determined from get_subset, and value is a list
            of dicts (same format of output of get_data_dicts())
            with data of that class
    """
    
    if get_subset is None:
        get_subset = lambda x: x['dataset_class']
    
    
    subsets = np.unique([get_subset(x) for x in dataset_dicts])

    datasets = dict(zip(subsets, [[] for _ in subsets]))
    
    for d in dataset_dicts:
        datasets[get_subset(d)].append(d)
    
    return datasets
    

# TODO setup 'thing_classes' to read from data-- later, this requires a lot of changes

json_path = '../data/raw/via_2.0.8/via_powder_particle_masks.json'
ddicts = get_data_dicts(json_path)

subs = split_data_dict(ddicts)

for key, value in subs.items():
    DatasetCatalog.register("powder_" + key, lambda key=key: subs.get(key))
    MetadataCatalog.get("powder_" + key).set(thing_classes=["Powder"])



In [3]:
len(subs['Training'])

4

In [4]:
len(DatasetCatalog.get('powder_Validation'))

1

In [5]:
MetadataCatalog.get('powder_Training')

Metadata(name='powder_Training', thing_classes=['Powder'])

In [6]:
MetadataCatalog.get('powder_Training').get('thing_classes')

['Powder']

In [7]:
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import cv2
import matplotlib.pyplot as plt

######### uncomment to visualize

# print('training')
# for d in DatasetCatalog.get('powder_Training'):
#     img = cv2.imread(str(d["file_name"]))
#     visualizer = Visualizer(img, metadata=MetadataCatalog.get('powder_Training'), scale=1)
#     vis = visualizer.draw_dataset_dict(d)
#     fig, ax = plt.subplots(figsize=(10,5), dpi=300)
#     plt.imshow(vis.get_image()[:, :, ::-1])
#     plt.show()
# print('validation')
# for d in DatasetCatalog.get('powder_Validation'):
#     img = cv2.imread(str(d["file_name"]))
#     visualizer = Visualizer(img, metadata=MetadataCatalog.get('powder_Training'), scale=1)
#     vis = visualizer.draw_dataset_dict(d)
#     fig, ax = plt.subplots(figsize=(10,5), dpi=300)
#     plt.imshow(vis.get_image()[:, :, ::-1])
#     plt.show()