In [None]:
# default_exp main

In [None]:
# export

import argparse
import logging
from pathlib import Path

from cocorepr.coco import merge_datasets
from cocorepr.json_file import *
from cocorepr.json_tree import *
from cocorepr.crop_tree import *

logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger()

In [None]:
# export

def get_parser():
    parser = argparse.ArgumentParser(
        description="Tool for converting datasets in COCO format between different formats"
    )
    parser.add_argument("--in_json_tree", type=Path, nargs="*", default=[])
    parser.add_argument("--in_json_file", type=Path, nargs="*", default=[])
    parser.add_argument("--in_crop_tree", type=Path, nargs="*", default=[])

    parser.add_argument("--out_path", type=Path, required=True)
    parser.add_argument("--out_format", choices=['json_file', 'json_tree', 'crop_tree'], required=True)

    parser.add_argument("--overwrite", action='store_true')
    parser.add_argument("--indent", default=4,
                        type=lambda x: int(x) if str(x).lower() not in ('none', 'null', '~') else None)
    parser.add_argument("--debug", action='store_true')

    return parser

In [1]:
# export

def main(args=None):
    args = args or get_parser().parse_args()

    if args.debug:
        logging.getLogger().setLevel(logging.DEBUG)

    logger.info(f'Arguments: {args}')

    in_json_tree_list = args.in_json_tree
    in_json_file_list = args.in_json_file
    in_crop_tree_list = args.in_crop_tree

    out_path = args.out_path
    out_format = args.out_format
    overwrite = args.overwrite
    indent = args.indent

    coco = None
    for in_json_tree in in_json_tree_list:
        coco = merge_datasets(coco, load_json_tree(in_json_tree))
    for in_json_file in in_json_file_list:
        coco = merge_datasets(coco, load_json_file(in_json_file))

    if coco is None:
        raise ValueError(f'Not found base dataset, please specify either of: '
                         '--in_json_tree / --in_json_file (multiple arguments allowed)')
    logger.info(f'Loaded json dataset: {coco.to_full_str()}')

    coco_crop = None
    for in_crop_tree in in_crop_tree_list:
        coco_crop = merge_datasets(coco_crop, load_crop_tree(in_crop_tree, coco))
    if coco_crop is not None:
        logger.info(f'Loaded coco tree dataset: {coco_crop.to_full_str()}')
        logger.info('Using coco_crop dataset.S')
        coco = coco_crop

    if out_format == 'json_file':
        dump_fun = dump_json_file
    elif out_format == 'json_tree':
        dump_fun = dump_json_tree
    elif out_format == 'crop_tree':
        dump_fun = dump_crop_tree
    else:
        raise ValueError(out_format)
    dump_fun(coco, out_path, skip_nulls=True, overwrite=overwrite, indent=indent)

    details = f': {[p.name for p in out_path.iterdir()]}' if out_path.is_dir() else ''
    logger.info(f'[+] Success: {out_format} dumped to {out_path}' + details)

In [2]:
# To test below, you need to do:
# $ cd <project root>
# $ make build
# $ pip install -e .
# reload current notebook kernel

In [3]:
# json_file -> json_tree

! rm -rf /tmp/cococo/json_tree
! cocorepr \
    --in_json_file ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --out_path /tmp/cococo/json_tree \
    --out_format json_tree \
    --overwrite

INFO| Arguments: Namespace(debug=False, in_crop_tree=[], in_json_file=[PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json')], in_json_tree=[], indent=4, out_format='json_tree', out_path=PosixPath('/tmp/cococo/json_tree'), overwrite=True)
INFO| Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO| Loaded json file: images=6, annotations=6, categories=3
INFO| Loaded total json dataset: len(annotations)=6 len(images)=6 len(categories)=3
INFO| Dumping json tree to dir: /tmp/cococo/json_tree
INFO| [+] Success: json_tree dumped to /tmp/cococo/json_tree: ['info.json', 'info', 'categories', 'annotations', 'licenses', 'images']


In [None]:
# json_tree -> json_file

! rm -rf /tmp/cococo/json_file
! cocorepr \
    --in_json_tree ../examples/coco_chunk/json_tree \
    --out_path /tmp/cococo/json_file/annotations.json \
    --out_format json_file \
    --indent=None

INFO: Arguments: Namespace(debug=False, in_crop_tree=None, in_json_files=[], in_json_trees=[PosixPath('../examples/coco_chunk/json_tree')], indent=None, out_format='json_file', out_path=PosixPath('/tmp/cococo/json_file/annotations.json'), overwrite=False)
INFO: Loading json tree from dir: ../examples/coco_chunk/json_tree
INFO: Dumping json file to file: /tmp/cococo/json_file/annotations.json
INFO: Writing dataset to json file: /tmp/cococo/json_file/annotations.json
INFO: [+] Success: json_file dumped to /tmp/cococo/json_file/annotations.json


In [None]:
# json_file -> crop_tree

! rm -rf /tmp/cococo/crop_tree
! cocorepr \
    --in_json_file ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --out_path /tmp/cococo/crop_tree \
    --out_format crop_tree \
    --overwrite

INFO: Arguments: Namespace(debug=False, in_crop_tree=None, in_json_files=[PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json')], in_json_trees=[], indent=4, out_format='crop_tree', out_path=PosixPath('/tmp/cococo/crop_tree'), overwrite=True)
INFO: Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO: Loaded: images=6, annotations=6, categories=3
INFO: Dumping crop tree to dir: /tmp/cococo/crop_tree
Processing images: 100%|██████████████████████████| 6/6 [00:03<00:00,  1.53it/s]
INFO: [+] Success: crop_tree dumped to /tmp/cococo/crop_tree: ['crops', 'images']


In [None]:
cats = !ls /tmp/cococo/crop_tree/crops
cat = cats[0]
cat

'bicycle--2'

In [None]:
# modify crop_tree

In [None]:
crops = !ls /tmp/cococo/crop_tree/crops/{cat}
deleted_crop = crops[0]
deleted_crop

'124710.png'

In [None]:
! ls /tmp/cococo/crop_tree/crops/{cat}
! rm /tmp/cococo/crop_tree/crops/{cat}/{deleted_crop}
! ls /tmp/cococo/crop_tree/crops/{cat}

124710.png  124713.png
124713.png


In [None]:
# json_file + crop_tree (modified) -> json_tree

! rm -rf /tmp/json_tree_2
! cocorepr \
    --in_json_file ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --in_crop_tree /tmp/cococo/crop_tree \
    --out_path /tmp/cococo/json_tree_2 \
    --out_format json_tree

! [ ! -f {TMP2}/annotations/{deleted_crop} ] && echo "[+] File successfully not exists"

INFO: Arguments: Namespace(debug=False, in_crop_tree=PosixPath('/tmp/cococo/crop_tree'), in_json_files=[PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json')], in_json_trees=[], indent=4, out_format='json_tree', out_path=PosixPath('/tmp/cococo/json_tree_2'), overwrite=False)
INFO: Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO: Loaded: images=6, annotations=6, categories=3
INFO: Loading blob list from dir: /tmp/cococo/crop_tree
INFO: Loaded crop tree: len(annotations)=5 len(images)=5 len(categories)=3
INFO: Dumping json tree to dir: /tmp/cococo/json_tree_2
Traceback (most recent call last):
  File "/home/ay/.pyenv/versions/3.7.6/bin/cocorepr", line 33, in <module>
    sys.exit(load_entry_point('cocorepr', 'console_scripts', 'cocorepr')())
  File "/plain/github/nm/cocorepr/cocorepr/main.py", line 81, in main
    dump_fun(coco, out_path, skip_nulls=True, overwrite=overwrite, indent=indent)
  File "/plain/gith

In [None]:
# json_tree + crop_tree -> crop_tree

! rm -rf /tmp/cococo/crop_tree_2
! cocorepr \
    --in_json_tree /tmp/cococo/json_tree_2 \
    --in_crop_tree /tmp/cococo/crop_tree \
    --out_path /tmp/cococo/crop_tree_2 \
    --out_format crop_tree
! ls /tmp/cococo/crop_tree_2 | grep crops

INFO: Arguments: Namespace(debug=False, in_crop_tree=PosixPath('/tmp/cococo/crop_tree'), in_json_files=[], in_json_trees=[PosixPath('/tmp/cococo/json_tree_2')], indent=4, out_format='crop_tree', out_path=PosixPath('/tmp/cococo/crop_tree_2'), overwrite=False)
INFO: Loading json tree from dir: /tmp/cococo/json_tree_2
INFO: Loading blob list from dir: /tmp/cococo/crop_tree
INFO: Loaded crop tree: len(annotations)=5 len(images)=5 len(categories)=3
INFO: Dumping crop tree to dir: /tmp/cococo/crop_tree_2
Processing images: 100%|██████████████████████████| 5/5 [00:04<00:00,  1.20it/s]
INFO: [+] Success: crop_tree dumped to /tmp/cococo/crop_tree_2: ['crops', 'images']
crops


In [None]:
# json_tree + crop_tree -> json_file

! rm -rf /tmp/cococo/json_file_2
! cocorepr \
    --in_json_tree /tmp/cococo/json_tree_2 \
    --in_crop_tree /tmp/cococo/crop_tree \
    --out_path /tmp/cococo/json_file_2/annotations.json \
    --out_format json_file

! ls /tmp/cococo/json_file_2/annotations.json

INFO: Arguments: Namespace(debug=False, in_crop_tree=PosixPath('/tmp/cococo/crop_tree'), in_json_files=[], in_json_trees=[PosixPath('/tmp/cococo/json_tree_2')], indent=4, out_format='json_file', out_path=PosixPath('/tmp/cococo/json_file_2/annotations.json'), overwrite=False)
INFO: Loading json tree from dir: /tmp/cococo/json_tree_2
INFO: Loading blob list from dir: /tmp/cococo/crop_tree
INFO: Loaded crop tree: len(annotations)=5 len(images)=5 len(categories)=3
INFO: Dumping json file to file: /tmp/cococo/json_file_2/annotations.json
INFO: Writing dataset to json file: /tmp/cococo/json_file_2/annotations.json
INFO: [+] Success: json_file dumped to /tmp/cococo/json_file_2/annotations.json
/tmp/cococo/json_file_2/annotations.json


In [None]:
# json_tree + json_file + crop_tree -> json_tree

! rm -rf /tmp/cococo/json_file_3/
! cocorepr \
    --in_json_tree /tmp/cococo/json_tree_2 \
    --in_json_file /tmp/cococo/json_file/annotations.json \
    --out_path /tmp/cococo/json_file_3/annotations.json \
    --out_format json_file \
    --overwrite


INFO: Arguments: Namespace(debug=False, in_crop_tree=None, in_json_files=[PosixPath('/tmp/cococo/json_file/annotations.json')], in_json_trees=[PosixPath('/tmp/cococo/json_tree_2')], indent=4, out_format='json_file', out_path=PosixPath('/tmp/cococo/json_file_3/annotations.json'), overwrite=True)
INFO: Loading json tree from dir: /tmp/cococo/json_tree_2
INFO: Loading json file from file: /tmp/cococo/json_file/annotations.json
INFO: Loaded: images=6, annotations=6, categories=3
INFO: Dumping json file to file: /tmp/cococo/json_file_3/annotations.json
INFO: Writing dataset to json file: /tmp/cococo/json_file_3/annotations.json
INFO: [+] Success: json_file dumped to /tmp/cococo/json_file_3/annotations.json
