In [None]:
# default_exp json_file

In [None]:
# export

import logging
from dataclasses import dataclass
from typing import *
from pathlib import Path
import json

from cococleaner.utils import sort_dict
from cococleaner.coco import *

logger = logging.getLogger()

In [None]:
#export

def load_json_file(annotations_json: Union[str, Path], *, kind: str = "object_detection") -> CocoDataset:
    from_dict_function = get_dataset_class(kind).from_dict

    annotations_json = Path(annotations_json)
    logger.info(f"Loading json file from file: {annotations_json}")
    D = json.loads(annotations_json.read_text())
    logger.info(
        f"Loaded: images={len(D['images'])}, "
        f"annotations={len(D['annotations'])}, "
        f"categories={len(D['categories'])}"
    )
    return from_dict_function(D)


In [None]:
# hide
PATH = '../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json'
d = load_json_file(PATH)
  
display(d.info)
display(d.annotations[0])
display(d.images[0])
display(d.categories[0])

assert isinstance(d.info, CocoInfo)
assert isinstance(d.annotations[0], CocoObjectDetectionAnnotation)
assert isinstance(d.images[0], CocoImage)
assert isinstance(d.categories[0], CocoObjectDetectionCategory)

INFO:root:Loading json file from file: ../examples/coco-dataset/json.file/instances_train2017_chunk3x2.json
INFO:root:Loaded: images=6, annotations=6, categories=3


CocoInfo(year=2017, version='1.0', description='COCO 2017 Dataset', contributor='COCO Consortium', url='http://cocodataset.org', date_created='2017/09/01')

CocoObjectDetectionAnnotation(id=124710, image_id=140006, category_id=2, bbox=(496.52, 125.94, 143.48, 113.54), supercategory=None, area=9387.706200000002, iscrowd=0)

CocoImage(id=49428, coco_url='http://images.cocodataset.org/train2017/000000049428.jpg', width=640, height=427, license=6, file_name='000000049428.jpg', flickr_url='http://farm7.staticflickr.com/6014/5923365195_bee5603371_z.jpg', date_captured='2013-11-15 04:30:29')

CocoObjectDetectionCategory(id=1, name='person', supercategory='person')

In [None]:
# export

def dump_json_file(
    coco: CocoDataset, 
    annotations_json: Union[str, Path],
    *,
    kind: str = "object_detection",
    skip_nulls: bool = False,
    overwrite: bool = False,
    indent: Optional[int] = 4,
) -> None:
    dataset_class = get_dataset_class(kind)
    if skip_nulls:
        to_dict_function = dataset_class.to_dict_skip_nulls
    else:
        to_dict_function = dataset_class.to_dict

    annotations_json = Path(annotations_json)
    logger.info(f"Dumping json file to file: {annotations_json}")
    if annotations_json.is_file() and not overwrite:
        raise ValueError(f"Destination json file already exists: {annotations_json}")
    raw = sort_dict(to_dict_function(coco))
    logger.info(f"Writing dataset to json file: {annotations_json}")
    annotations_json.parent.mkdir(parents=True, exist_ok=True)
    annotations_json.write_text(json.dumps(raw, indent=indent))


NameError: name 'CocoDataset' is not defined

In [None]:
# hide
import tempfile
tmp = tempfile.mktemp()

dump_json_file(d, tmp)
! cat {tmp} | jq .info
! cat {tmp} | jq .images[0]
! cat {tmp} | jq .annotations[0]
! cat {tmp} | jq .categories[0]

INFO:root:Dumping json file to file: /tmp/tmpqnqdcnup
INFO:root:Writing dataset to json file: /tmp/tmpqnqdcnup


[1;39m{
  [0m[34;1m"year"[0m[1;39m: [0m[0;39m2017[0m[1;39m,
  [0m[34;1m"version"[0m[1;39m: [0m[0;32m"1.0"[0m[1;39m,
  [0m[34;1m"description"[0m[1;39m: [0m[0;32m"COCO 2017 Dataset"[0m[1;39m,
  [0m[34;1m"contributor"[0m[1;39m: [0m[0;32m"COCO Consortium"[0m[1;39m,
  [0m[34;1m"url"[0m[1;39m: [0m[0;32m"http://cocodataset.org"[0m[1;39m,
  [0m[34;1m"date_created"[0m[1;39m: [0m[0;32m"2017/09/01"[0m[1;39m
[1;39m}[0m
[1;39m{
  [0m[34;1m"id"[0m[1;39m: [0m[0;39m49428[0m[1;39m,
  [0m[34;1m"coco_url"[0m[1;39m: [0m[0;32m"http://images.cocodataset.org/train2017/000000049428.jpg"[0m[1;39m,
  [0m[34;1m"width"[0m[1;39m: [0m[0;39m640[0m[1;39m,
  [0m[34;1m"height"[0m[1;39m: [0m[0;39m427[0m[1;39m,
  [0m[34;1m"license"[0m[1;39m: [0m[0;39m6[0m[1;39m,
  [0m[34;1m"file_name"[0m[1;39m: [0m[0;32m"000000049428.jpg"[0m[1;39m,
  [0m[34;1m"flickr_url"[0m[1;39m: [0m[0;32m"http://farm7.staticflickr.com/6014/5923365