In [1]:
import pathlib
import json
import os

import skimage.io
import numpy as np
import json
from detectron2.config import get_cfg
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.engine import DefaultTrainer
from detectron2.structures import BoxMode

# test cuda
import torch
from torch.utils.cpp_extension import CUDA_HOME
print(torch.cuda.is_available(), CUDA_HOME)


True None


In [2]:
import detectron2.utils.comm as comm
from detectron2.checkpoint import DetectionCheckpointer, PeriodicCheckpointer
from detectron2.config import get_cfg

from detectron2.engine import default_argument_parser, default_setup, launch
from detectron2.evaluation import (
    CityscapesEvaluator,
    COCOEvaluator,
    COCOPanopticEvaluator,
    DatasetEvaluators,
    LVISEvaluator,
    PascalVOCDetectionEvaluator,
    SemSegEvaluator,
    inference_on_dataset,
    print_csv_format,
)
from detectron2.modeling import build_model
from detectron2.solver import build_lr_scheduler, build_optimizer
from detectron2.utils.events import (
    CommonMetricPrinter,
    EventStorage,
    JSONWriter,
    TensorboardXWriter,)

from detectron2.data import (
    MetadataCatalog,
    build_detection_test_loader,
    build_detection_train_loader,
)


In [5]:
json_path = pathlib.Path('../data/raw/via_2.0.8/via_powder_particle_masks.json')
with open(json_path, 'rb') as f:
    data = json.load(f)

In [45]:
MetadataCatalog.get('powder_Training').set(**{'thing_classes': [1,2]})

Metadata(name='powder_Training', thing_classes=[1, 2])

In [47]:
MetadataCatalog.get('powder_Training').thing_classes

[1, 2]

In [39]:
sorted(labels, key=lambda x: x.upper())

['A', 'a', 'Powder']

In [26]:
labels = set()
for x in data['_via_img_metadata'].values():
    print(x['filename'])
    print(len(x['regions']))
    for y in x['regions']:
        labels.add(y['region_attributes']['Label'])

labels       
#data['_via_img_metadata']['image.png1371223']['regions'][0]['region_attributes']['Label']

Sc1Tile_001-001-000_0-000.png
219
Sc1Tile_001-002-000_0-000.png
351
Sc2Tile_001-001-000_0-000.png
259
Sc2Tile_010-006-000_0-000.png
238
Sc3Tile_008-007-000_0-000.png
293


{'Powder'}

In [34]:
sorted(['b','c','a','Z'])

['Z', 'a', 'b', 'c']

In [37]:
labels.add('A')
sorted(list(labels), key=lambda x: x.upper())

['A', 'a', 'Powder']

In [None]:
def get_data_dicts(json_path):
    """
    Loads data in format consistent with detectron2.
    Adapted from balloon example here:
    https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5
    
    Inputs: 
      json_path: string or pathlib path to json file containing relevant annotations
    
    Outputs:
      dataset_dicts: list(dic) of datasets compatible for detectron 2
                     More information can be found at:
                     https://detectron2.readthedocs.io/tutorials/datasets.html#
    """
    json_path = os.path.join(json_path) # needed for path manipulations
    with open(json_path) as f:
        via_data = json.load(f)
        
    # root directory of images is given by relative path in json file
    img_root = os.path.join(os.path.dirname(json_path), via_data['_via_settings']['core']['default_filepath'])
    imgs_anns = via_data['_via_img_metadata']
    
    
    dataset_dicts = []
    for idx, v in enumerate(imgs_anns.values()):
        record = {}

        filename = os.path.join(img_root, v["filename"])
        
        # inefficient for large sets of images, read from json?
        height, width = skimage.io.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width
        record["dataset_class"] = v['file_attributes']['Image Class']
        
        annos = v["regions"]
        objs = []
        for anno in annos:
            # not sure why this was here, commenting it out didn't seem to break anything
            #assert not anno["region_attributes"] 
            anno = anno["shape_attributes"]
            
            # polygon masks is list of polygon coordinates in format ([x0,y0,x1,y1...xn,yn]) as specified in
            # https://detectron2.readthedocs.io/modules/structures.html#detectron2.structures.PolygonMasks
            px = anno["all_points_x"]
            py = anno["all_points_y"]
            poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
            poly = [p for x in poly for p in x]
            
            
            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS, # boxes are given in absolute coordinates (ie not corner+width+height)
                "segmentation": [poly],
                "category_id": 0,
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

def split_data_dict(dataset_dicts, get_subset=None):
    """
    Splits data from json into subsets (ie training/validation/testing)
    
    inputs 
      dataset_dicts- list(dic) from get_data_dicts()
      get_subset- function that identifies 
                  class of each item  in dataset_dict.
                  For example, get_subset(dataset_dicts[0])
                  returns 'Training', 'Validation', 'Test', etc
                  If None, default function is used
    
    returns
      subs- dictionary where each key is the class of data
            determined from get_subset, and value is a list
            of dicts (same format of output of get_data_dicts())
            with data of that class
    """
    
    if get_subset is None:
        get_subset = lambda x: x['dataset_class']
    
    
    subsets = np.unique([get_subset(x) for x in dataset_dicts])

    datasets = dict(zip(subsets, [[] for _ in subsets]))
    
    for d in dataset_dicts:
        datasets[get_subset(d)].append(d)
    
    return datasets
    

# TODO setup 'thing_classes' to read from data-- later, this requires a lot of changes

json_path = '../data/raw/via_2.0.8/via_powder_particle_masks.json'
ddicts = get_data_dicts(json_path)

subs = split_data_dict(ddicts)

for key, value in subs.items():
    DatasetCatalog.register("powder_" + key, lambda key=key: subs.get(key))
    MetadataCatalog.get("powder_" + key).set(thing_classes=["Powder"])



In [None]:
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
import cv2
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import matplotlib.pyplot as plt

######### uncomment to visualize

print('training')
for d in DatasetCatalog.get('powder_Training'):
    img = cv2.imread(str(d["file_name"]))
    visualizer = Visualizer(img, metadata=MetadataCatalog.get('powder_Training'), scale=1)
    vis = visualizer.draw_dataset_dict(d)
    fig, ax = plt.subplots(figsize=(10,5), dpi=300)
    plt.imshow(vis.get_image()[:, :, ::-1])
    plt.show()
print('validation')
for d in DatasetCatalog.get('powder_Validation'):
    img = cv2.imread(str(d["file_name"]))
    visualizer = Visualizer(img, metadata=MetadataCatalog.get('powder_Training'), scale=1)
    vis = visualizer.draw_dataset_dict(d)
    fig, ax = plt.subplots(figsize=(10,5), dpi=300)
    plt.imshow(vis.get_image()[:, :, ::-1])
    plt.show()

In [None]:
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("powder_Training",)
cfg.DATASETS.TEST = ("powder_Validation")
cfg.DATALOADER.NUM_WORKERS = 2
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.MODEL_WEIGHTS = '../models/model_final_f10217.pkl'
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (powder particle)

In [None]:
ddict = subs['Training'][0]
ddict.keys()

In [None]:
from detectron2.data.dataset_mapper import DatasetMapper
mapper = DatasetMapper(cfg)
mapped_ddict = mapper(ddict)

In [None]:
mapped_ddict['instances']

In [None]:
mapped_ddict.keys()

In [None]:
cfg.DATASETS.TRAIN

In [None]:
import dataval
data_val = dataval.build_detection_val_loader(cfg, None, ['powder_Validation'])

In [None]:
data_loader = build_detection_train_loader(cfg)
data_val_loader = build_detection_test_loader(cfg, 'powder_Validation')

for x in data_loader:
    print(x[0].keys())
    print(type(x[0]['instances']))
    print(dir(x[0]['instances']))
    break
    

In [None]:
error

In [None]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
??model_zoo.get_checkpoint_url

In [None]:
model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo

In [None]:
cfg.MODEL.WEIGHTS