In [None]:
# default_exp main

In [None]:
# export

import argparse
import logging
from pathlib import Path

from cococleaner.json_file import *
from cococleaner.json_tree import *
from cococleaner.crop_tree import *

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

In [None]:
# export 

def get_parser():
    parser = argparse.ArgumentParser(
        description="Tool for converting datasets in COCO format between different formats"
    )

    parser.add_argument("--in_json_path", type=Path, required=True)
    parser.add_argument("--in_crop_tree_path", type=Path)
    parser.add_argument("--out_path", type=Path, required=True)
    parser.add_argument("--out_format", choices=['json_file', 'json_tree', 'crop_tree'], required=True)
    parser.add_argument("--overwrite", action='store_true')
    return parser


In [None]:
# export

def main(args=None):
    args = args or get_parser().parse_args()
    logger.info(f'Arguments: {args}')
    in_json_path = args.in_json_path
    in_crop_tree_path = args.in_crop_tree_path
    
    out_path = args.out_path
    out_format = args.out_format
    overwrite = args.overwrite
    
    if in_crop_tree_path and out_format == 'crop_tree':
        raise ValueError('Incompatible options: --in_crop_tree_path=... '
                         'and --out_format=crop_tree')
    
    coco = None
    if in_json_path.is_file():
        ext = in_json_path.suffix
        if ext != '.json':
            raise ValueError(f'Expect .json file as input, got: {in_json_path}')
        coco = load_json_file(in_json_path)
    elif in_json_path.is_dir():
        coco = load_json_tree(in_json_path)
    
    if coco is None:
        raise ValueError(f'Neither json file nor json tree found in path: {in_json_path}')
    
    if in_crop_tree_path:
        coco = load_crop_tree(in_crop_tree_path, coco)
    
    if out_format == 'json_file':
        dump_fun = dump_json_file
    elif out_format == 'json_tree':
        dump_fun = dump_json_tree
    elif out_format == 'crop_tree':
        dump_fun = dump_crop_tree
    else:
        raise ValueError(out_format)
    dump_fun(coco, out_path, skip_nulls=True, overwrite=overwrite)
    
    logger.info(f'[+] Success: {out_format} dumped to {out_path}: '
                f'{[p.name for p in out_path.iterdir()]}')

In [None]:
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --out_path $(mktemp -d) \
    --out_format json_tree \
    --overwrite

INFO:root:Arguments: Namespace(in_crop_tree_path=None, in_json_path=PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json'), out_format='json_tree', out_path=PosixPath('/tmp/tmp.CsaX2FKhsA'), overwrite=True)
INFO:root:Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Loaded: images=6, annotations=6, categories=3
INFO:root:Dumping json tree to dir: /tmp/tmp.CsaX2FKhsA
INFO:root:Deleting old target tree directory /tmp/tmp.CsaX2FKhsA
DEBUG:root:Written 6 elements to /tmp/tmp.CsaX2FKhsA/images
DEBUG:root:Written 8 elements to /tmp/tmp.CsaX2FKhsA/licenses
DEBUG:root:Written 6 elements to /tmp/tmp.CsaX2FKhsA/annotations
DEBUG:root:Written 3 elements to /tmp/tmp.CsaX2FKhsA/categories
DEBUG:root:Written single element to /tmp/tmp.CsaX2FKhsA/info
INFO:root:[+] Success: json_tree dumped to /tmp/tmp.CsaX2FKhsA: ['info.json', 'info', 'categories', 'annotations', 'licenses', 'images']


In [None]:
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_tree \
    --out_path $(mktemp -d) \
    --out_format json_tree \
    --overwrite

INFO:root:Arguments: Namespace(in_crop_tree_path=None, in_json_path=PosixPath('../examples/coco_chunk/json_tree'), out_format='json_tree', out_path=PosixPath('/tmp/tmp.WiC8talkFw'), overwrite=True)
INFO:root:Loading json tree from dir: ../examples/coco_chunk/json_tree
DEBUG:root:Loaded 6 json chunks from ../examples/coco_chunk/json_tree/images
DEBUG:root:Loaded 8 json chunks from ../examples/coco_chunk/json_tree/licenses
DEBUG:root:Loaded 6 json chunks from ../examples/coco_chunk/json_tree/annotations
DEBUG:root:Loaded 3 json chunks from ../examples/coco_chunk/json_tree/categories
DEBUG:root:Loaded single-file 3 json chunk ../examples/coco_chunk/json_tree/categories
INFO:root:Dumping json tree to dir: /tmp/tmp.WiC8talkFw
INFO:root:Deleting old target tree directory /tmp/tmp.WiC8talkFw
DEBUG:root:Written 6 elements to /tmp/tmp.WiC8talkFw/images
DEBUG:root:Written 8 elements to /tmp/tmp.WiC8talkFw/licenses
DEBUG:root:Written 6 elements to /tmp/tmp.WiC8talkFw/annotations
DEBUG

In [None]:
TMP = !mktemp -d
TMP = TMP[0]
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --out_path {TMP} \
    --out_format crop_tree \
    --overwrite

INFO:root:Arguments: Namespace(in_crop_tree_path=None, in_json_path=PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json'), out_format='crop_tree', out_path=PosixPath('/tmp/tmp.96iXXQ4rjj'), overwrite=True)
INFO:root:Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Loaded: images=6, annotations=6, categories=3
INFO:root:Dumping crop tree to dir: /tmp/tmp.96iXXQ4rjj
INFO:root:Deleting old target directory /tmp/tmp.96iXXQ4rjj
Processing images: 100%|██████████████████████████| 6/6 [00:05<00:00,  1.15it/s]
INFO:root:[+] Success: crop_tree dumped to /tmp/tmp.96iXXQ4rjj: ['crops', 'images']


In [None]:
cats = !ls {TMP}/crops
cat = cats[0]
cat

'bicycle--2'

In [None]:
crops = !ls {TMP}/crops/{cat}
deleted_crop = crops[0]
deleted_crop

'124713.png'

In [None]:
! rm {TMP}/crops/{cat}/{deleted_crop}
! ls {TMP}/crops/{cat}

In [None]:
TMP2 = !mktemp -d
TMP2 = TMP2[0]

! rmdir {TMP2}
! cococleaner \
    --in_json_path ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json \
    --in_crop_tree_path {TMP} \
    --out_path {TMP2} \
    --out_format json_tree

! ls {TMP2}/annotations/{deleted_crop} || echo "File successfully not exists"

INFO:root:Arguments: Namespace(in_crop_tree_path=PosixPath('/tmp/tmp.96iXXQ4rjj'), in_json_path=PosixPath('../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json'), out_format='json_tree', out_path=PosixPath('/tmp/tmp.LQfBh6qqKC'), overwrite=False)
INFO:root:Loading json file from file: ../examples/coco_chunk/json_file/instances_train2017_chunk3x2.json
INFO:root:Loaded: images=6, annotations=6, categories=3
INFO:root:Loading blob list from dir: /tmp/tmp.96iXXQ4rjj
DEBUG:root:Loaded 2 crops from: /tmp/tmp.96iXXQ4rjj/crops/person--1
DEBUG:root:Loaded 2 crops from: /tmp/tmp.96iXXQ4rjj/crops/car--3
DEBUG:root:Loaded 2 crops from: /tmp/tmp.96iXXQ4rjj/crops/bicycle--2
INFO:root:Loaded crop tree: len(annotations)=4 len(images)=4 len(categories)=2
INFO:root:Dumping json tree to dir: /tmp/tmp.LQfBh6qqKC
DEBUG:root:Written 4 elements to /tmp/tmp.LQfBh6qqKC/images
DEBUG:root:Written 8 elements to /tmp/tmp.LQfBh6qqKC/licenses
DEBUG:root:Written 4 elements to /tmp/tmp.LQfBh6qqKC/annotat