In [None]:
# default_exp main

In [None]:
# export

import argparse
import logging
from pathlib import Path

from cococleaner.json_file import *
from cococleaner.json_tree import *
from cococleaner.crop_tree import *

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

In [None]:
# export

def get_parser():
    parser = argparse.ArgumentParser(
        description="Tool for converting datasets in COCO format between different formats"
    )
    parser.add_argument("--in_json_path", type=Path, required=True)
    parser.add_argument("--in_crop_tree_path", type=Path)
    parser.add_argument("--out_path", type=Path, required=True)
    parser.add_argument("--out_format", choices=['json_file', 'json_tree', 'crop_tree'], required=True)
    parser.add_argument("--overwrite", action='store_true')
    parser.add_argument("--indent", default=4,
                        type=lambda x: int(x) if str(x).lower() not in ('none', 'null', '~') else None)
    return parser


In [None]:
# export

def main(args=None):
    args = args or get_parser().parse_args()
    logger.info(f'Arguments: {args}')
    in_json_path = args.in_json_path
    in_crop_tree_path = args.in_crop_tree_path
    
    out_path = args.out_path
    out_format = args.out_format
    overwrite = args.overwrite
    indent = args.indent
    
    coco = None
    in_format = None
    if in_json_path.is_file():
        in_format = 'json_file'
        ext = in_json_path.suffix
        if ext != '.json':
            raise ValueError(f'Expect .json file as input, got: {in_json_path}')
        coco = load_json_file(in_json_path)
    elif in_json_path.is_dir():
        in_format = 'json_tree'
        coco = load_json_tree(in_json_path)
    else:
        raise ValueError(f'Neither json_file nor json_tree found: {in_json_path}')
    logger.info(f'Detected input dataset type: {in_format}: {in_json_path}')
    
    if in_crop_tree_path:
        coco_merged = load_crop_tree(in_crop_tree_path, coco)
        coco = coco_merged
    elif in_format == out_format:
        raise ValueError(f'Conversion not supported (without input crop_tree): '
                         f'{in_format} -> {out_format}')

    if out_format == 'json_file':
        dump_fun = dump_json_file
    elif out_format == 'json_tree':
        dump_fun = dump_json_tree
    elif out_format == 'crop_tree':
        dump_fun = dump_crop_tree
    else:
        raise ValueError(out_format)
    dump_fun(coco, out_path, skip_nulls=True, overwrite=overwrite, indent=indent)
    
    details = f': {[p.name for p in out_path.iterdir()]}' if out_path.is_dir() else ''
    logger.info(f'[+] Success: {out_format} dumped to {out_path}' + details)

In [None]:
# To test below, you need to do:
# $ cd <project root>
# $ make build
# $ pip install -e .
# reload current notebook kernel

In [None]:
# json_file -> json_tree

! rm -rf /tmp/cococo/json_tree
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --out_path /tmp/cococo/json_tree \
    --out_format json_tree \
    --overwrite

INFO:root:Arguments: Namespace(in_crop_tree_path=None, in_json_path=PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json'), indent=4, out_format='json_tree', out_path=PosixPath('/tmp/cococo/json_tree'), overwrite=True)
INFO:root:Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Loaded: images=6, annotations=6, categories=3
INFO:root:Detected input dataset type: json_file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Dumping json tree to dir: /tmp/cococo/json_tree
DEBUG:root:Written 6 elements to /tmp/cococo/json_tree/images
DEBUG:root:Written 8 elements to /tmp/cococo/json_tree/licenses
DEBUG:root:Written 6 elements to /tmp/cococo/json_tree/annotations
DEBUG:root:Written 3 elements to /tmp/cococo/json_tree/categories
DEBUG:root:Written single element to /tmp/cococo/json_tree/info
INFO:root:[+] Success: json_tree dumped to /tmp/cococo/json_tree: ['info.json', 'info'

In [None]:
# json_tree -> json_file

! rm -rf /tmp/cococo/json_file
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_tree \
    --out_path /tmp/cococo/json_file/annotations.json \
    --out_format json_file \
    --indent=None

INFO:root:Arguments: Namespace(in_crop_tree_path=None, in_json_path=PosixPath('../examples/coco_chunk/json_tree'), indent=None, out_format='json_file', out_path=PosixPath('/tmp/cococo/json_file/annotations.json'), overwrite=False)
INFO:root:Loading json tree from dir: ../examples/coco_chunk/json_tree
DEBUG:root:Loaded 6 json chunks from ../examples/coco_chunk/json_tree/images
DEBUG:root:Loaded 8 json chunks from ../examples/coco_chunk/json_tree/licenses
DEBUG:root:Loaded 6 json chunks from ../examples/coco_chunk/json_tree/annotations
DEBUG:root:Loaded 3 json chunks from ../examples/coco_chunk/json_tree/categories
DEBUG:root:Loaded single-file 3 json chunk ../examples/coco_chunk/json_tree/categories
INFO:root:Detected input dataset type: json_tree: ../examples/coco_chunk/json_tree
INFO:root:Dumping json file to file: /tmp/cococo/json_file/annotations.json
INFO:root:Writing dataset to json file: /tmp/cococo/json_file/annotations.json
INFO:root:[+] Success: json_file dumped to /

In [None]:
# json_file -> crop_tree

! rm -rf /tmp/cococo/crop_tree
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --out_path /tmp/cococo/crop_tree \
    --out_format crop_tree \
    --overwrite

INFO:root:Arguments: Namespace(in_crop_tree_path=None, in_json_path=PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json'), indent=4, out_format='crop_tree', out_path=PosixPath('/tmp/cococo/crop_tree'), overwrite=True)
INFO:root:Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Loaded: images=6, annotations=6, categories=3
INFO:root:Detected input dataset type: json_file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Dumping crop tree to dir: /tmp/cococo/crop_tree
Processing images: 100%|██████████████████████████| 6/6 [00:03<00:00,  1.53it/s]
INFO:root:[+] Success: crop_tree dumped to /tmp/cococo/crop_tree: ['crops', 'images']


In [None]:
cats = !ls /tmp/cococo/crop_tree/crops
cat = cats[0]
cat

'bicycle--2'

In [None]:
# modify crop_tree

In [None]:
crops = !ls /tmp/cococo/crop_tree/crops/{cat}
deleted_crop = crops[0]
deleted_crop

'124710.png'

In [None]:
! ls /tmp/cococo/crop_tree/crops/{cat}
! rm /tmp/cococo/crop_tree/crops/{cat}/{deleted_crop}
! ls /tmp/cococo/crop_tree/crops/{cat}

124710.png  124713.png
124713.png


In [None]:
# json_file + crop_tree (modified) -> json_tree

! rm -rf /tmp/json_tree_2
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --in_crop_tree_path /tmp/cococo/crop_tree \
    --out_path /tmp/cococo/json_tree_2 \
    --out_format json_tree

! [ ! -f {TMP2}/annotations/{deleted_crop} ] && echo "[+] File successfully not exists"

INFO:root:Arguments: Namespace(in_crop_tree_path=PosixPath('/tmp/cococo/crop_tree'), in_json_path=PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json'), indent=4, out_format='json_tree', out_path=PosixPath('/tmp/cococo/json_tree_2'), overwrite=False)
INFO:root:Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Loaded: images=6, annotations=6, categories=3
INFO:root:Detected input dataset type: json_file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Loading blob list from dir: /tmp/cococo/crop_tree
DEBUG:root:Loaded 2 crops from: /tmp/cococo/crop_tree/crops/person--1
DEBUG:root:Loaded 2 crops from: /tmp/cococo/crop_tree/crops/car--3
DEBUG:root:Loaded 1 crops from: /tmp/cococo/crop_tree/crops/bicycle--2
INFO:root:Loaded crop tree: len(annotations)=5 len(images)=5 len(categories)=3
INFO:root:Dumping json tree to dir: /tmp/cococo/json_tree_2
DEBUG:root:Written 5 elements to /tmp

In [None]:
# json_tree + crop_tree -> crop_tree

! rm -rf /tmp/cococo/crop_tree_2
! cococleaner \
    --in_json_path /tmp/cococo/json_tree_2 \
    --in_crop_tree_path /tmp/cococo/crop_tree \
    --out_path /tmp/cococo/crop_tree_2 \
    --out_format crop_tree
! ls /tmp/cococo/crop_tree_2 | grep crops

INFO:root:Arguments: Namespace(in_crop_tree_path=PosixPath('/tmp/cococo/crop_tree'), in_json_path=PosixPath('/tmp/cococo/json_tree_2'), indent=4, out_format='crop_tree', out_path=PosixPath('/tmp/cococo/crop_tree_2'), overwrite=False)
INFO:root:Loading json tree from dir: /tmp/cococo/json_tree_2
DEBUG:root:Loaded 5 json chunks from /tmp/cococo/json_tree_2/images
DEBUG:root:Loaded 8 json chunks from /tmp/cococo/json_tree_2/licenses
DEBUG:root:Loaded 5 json chunks from /tmp/cococo/json_tree_2/annotations
DEBUG:root:Loaded 3 json chunks from /tmp/cococo/json_tree_2/categories
DEBUG:root:Loaded single-file 3 json chunk /tmp/cococo/json_tree_2/categories
INFO:root:Detected input dataset type: json_tree: /tmp/cococo/json_tree_2
INFO:root:Loading blob list from dir: /tmp/cococo/crop_tree
DEBUG:root:Loaded 2 crops from: /tmp/cococo/crop_tree/crops/person--1
DEBUG:root:Loaded 2 crops from: /tmp/cococo/crop_tree/crops/car--3
DEBUG:root:Loaded 1 crops from: /tmp/cococo/crop_tree/crops/bicycle--2
I

In [None]:
# json_tree + crop_tree -> json_file

! rm -rf /tmp/cococo/json_file_2
! cococleaner \
    --in_json_path /tmp/cococo/json_tree_2 \
    --in_crop_tree_path /tmp/cococo/crop_tree \
    --out_path /tmp/cococo/json_file_2/annotations.json \
    --out_format json_file

! ls /tmp/cococo/json_file_2/annotations.json

INFO:root:Arguments: Namespace(in_crop_tree_path=PosixPath('/tmp/cococo/crop_tree'), in_json_path=PosixPath('/tmp/cococo/json_tree_2'), indent=4, out_format='json_file', out_path=PosixPath('/tmp/cococo/json_file_2/annotations.json'), overwrite=False)
INFO:root:Loading json tree from dir: /tmp/cococo/json_tree_2
DEBUG:root:Loaded 5 json chunks from /tmp/cococo/json_tree_2/images
DEBUG:root:Loaded 8 json chunks from /tmp/cococo/json_tree_2/licenses
DEBUG:root:Loaded 5 json chunks from /tmp/cococo/json_tree_2/annotations
DEBUG:root:Loaded 3 json chunks from /tmp/cococo/json_tree_2/categories
DEBUG:root:Loaded single-file 3 json chunk /tmp/cococo/json_tree_2/categories
INFO:root:Detected input dataset type: json_tree: /tmp/cococo/json_tree_2
INFO:root:Loading blob list from dir: /tmp/cococo/crop_tree
DEBUG:root:Loaded 2 crops from: /tmp/cococo/crop_tree/crops/person--1
DEBUG:root:Loaded 2 crops from: /tmp/cococo/crop_tree/crops/car--3
DEBUG:root:Loaded 1 crops from: /tmp/cococo/crop_tree/c

In [None]:
# json_tree -> json_tree

! cococleaner \
    --in_json_path /tmp/cococo/json_tree_2 \
    --out_path /tmp/cococo/json_file_3/annotations.json \
    --out_format json_tree \
|| echo "[+] Test passed"

INFO:root:Arguments: Namespace(in_crop_tree_path=None, in_json_path=PosixPath('/tmp/cococo/json_tree_2'), indent=4, out_format='json_tree', out_path=PosixPath('/tmp/cococo/json_file_3/annotations.json'), overwrite=False)
INFO:root:Loading json tree from dir: /tmp/cococo/json_tree_2
DEBUG:root:Loaded 5 json chunks from /tmp/cococo/json_tree_2/images
DEBUG:root:Loaded 8 json chunks from /tmp/cococo/json_tree_2/licenses
DEBUG:root:Loaded 5 json chunks from /tmp/cococo/json_tree_2/annotations
DEBUG:root:Loaded 3 json chunks from /tmp/cococo/json_tree_2/categories
DEBUG:root:Loaded single-file 3 json chunk /tmp/cococo/json_tree_2/categories
INFO:root:Detected input dataset type: json_tree: /tmp/cococo/json_tree_2
Traceback (most recent call last):
  File "/home/ay/.pyenv/versions/3.7.6/bin/cococleaner", line 33, in <module>
    sys.exit(load_entry_point('cococleaner', 'console_scripts', 'cococleaner')())
  File "/plain/github/nm/cococleaner/cococleaner/main.py", line 72, in main
    raise V