In [21]:
import os
from PIL import Image
import pandas as pd
import numpy as np
from pathlib import Path
import json
from tqdm import tqdm

In [22]:
COCO_ROOT = Path('/home/mwieczorek/data/coco-dataset/')
COCO_ANNOS = COCO_ROOT.joinpath('annotations') 
MODE = 'val'  # 'val' or 'train'
json_path = COCO_ANNOS/f'instances_{MODE}2017.json'

### Using dataframe, but there is no need

In [23]:
j = json.load(open(json_path, 'r'))
# print(j['categories'])
# df = pd.DataFrame(j['annotations'])
# df_person = df.loc[df['category_id'] == 1]
# person_image_ids = df_person['image_id'].values

### Use PyCocotools

In [24]:
import pycocotools as pyc
from pycocotools.coco import COCO

In [25]:
coco = COCO(json_path)

loading annotations into memory...
Done (t=0.30s)
creating index...
index created!


In [26]:
personCatId = coco.getCatIds('person')
personAnnIds = coco.getAnnIds(catIds=personCatId)
personImgIds = coco.getImgIds(catIds=personCatId);

In [27]:
personAnnos = [coco.anns[i] for i in personAnnIds]
personImgs = [coco.imgs[i] for i in personImgIds]

#### Remove segmentation from the annotations

In [28]:
[item.pop('segmentation') for item in personAnnos];

### Wrap person annos in COCO format

In [29]:
from coco_format_utils import COCO_json

In [30]:
coco_json = COCO_json('', '','','',None)

In [31]:
personCat = j['categories'][0]
personCat['id'] = 0  ### Obejct categories needs to start at 0!

In [32]:
personInfo = coco_json.create_info(year=2020, version=0.1, desc='', contr='', url='', datetime='')

In [33]:
personLicense = coco_json.create_license()

In [34]:
json_data = {
            'info': personInfo,
            'images': personImgs,
            'annotations': personAnnos,
            'categories': personCat,
            'licenses': personLicense
        }

In [35]:
with open(COCO_ANNOS/f'person_annotations_{MODE}.json', 'w') as f:
    json.dump(json_data, f)

### Copy person Images to a separate folder

In [16]:
import shutil
PERSON_IMGS_DIR = COCO_ROOT/f'person_{MODE}'
PERSON_IMGS_DIR.mkdir(parents=True, exist_ok=True)
COCO_IMGS_DIR = COCO_ROOT/f'{MODE}2017'

In [17]:
for item in tqdm(personImgs):
    fname = item['file_name']
    src = os.path.join(COCO_IMGS_DIR, fname)
    target = PERSON_IMGS_DIR.joinpath(fname)
    if target.is_file():
        continue
    shutil.copy(src, target)

100%|██████████| 2693/2693 [00:00<00:00, 6113.79it/s]
