# Dataset merge: CrowdPose + MPII + OCHuman for keypoints
- Use MPII + OCHuman in COCO syntax (Datasets_to_COCO.ipynb)
- Duplicate image paths exported with duplicate_image_finder.ipynb

In [1]:
import json
from pathlib import Path
from pprint import pprint as _pprint
def pprint(data): (_pprint(data, sort_dicts=False))
from configs import cfg
from tqdm.notebook import tqdm
import os
import shutil
import copy
import numpy as np
import fiftyone as fo
import webbrowser

In [14]:
# images from all combined datasets
# train images from main dataset to later test on + all images from other dataset
img_dir = '/Users/john/datasets/crowdpose_mpii_ochuman/images'

# ... same for annotation files
anno_files = [
    '/Users/john/datasets/crowdpose_mpii_oc/json/crowdpose_trainval_coco.json',  # crowd_pose train/val
    '/Users/john/datasets/crowdpose_mpii_oc/json/ochuman_coco.json',  # all (= train + test)
    '/Users/john/datasets/crowdpose_mpii_oc/json/mpii_coco.json',  # all 
    '/Users/john/datasets/crowdpose_mpii_oc/json/crowdpose_test.json',  # main test (crowd_pose) 
    '/Users/john/datasets/crowdpose_mpii_oc/json/overlap_export.json'  # image overlap log
] 

with open(anno_files[0]) as f0, open(anno_files[1]) as f1, open(anno_files[2]) as f2, open(anno_files[3]) as f3, open(anno_files[4]) as f4:
    crowd = json.load(f0)
    oc = json.load(f1)
    mpii = json.load(f2)
    main_test = json.load(f3)
    overlap = json.load(f4)

## 1. Reange keypoints to crowdpose

In [15]:
new_oc = copy.deepcopy(oc)
new_mpii = copy.deepcopy(mpii)

for anno in new_oc['annotations']:
    kps = anno['keypoints']
    kps = np.array_split(kps, len(kps)/3)
    # rearange, drop eyes, ears, nose
    kps = [kps[3], kps[0], kps[4], kps[1], kps[5],
           kps[2], kps[9], kps[6], kps[10],
           kps[7], kps[11], kps[8], kps[12], kps[13]]
    kps = list(np.array(kps).flatten())
    kps = [round(val) for val in kps]
    anno['keypoints'] = kps
    anno['num_keypoints'] = len([vis for vis in kps[2::3] if vis > 0])

for anno in new_mpii['annotations']:
    kps = anno['keypoints']
    kps = np.array_split(kps, len(kps)/3)
    # rearange, drop pelvis and thorax
    kps = [kps[13], kps[12], kps[14], kps[11], kps[15],
           kps[10], kps[3], kps[2], kps[4],
           kps[1], kps[5], kps[0], kps[9], kps[8]]
    kps = list(np.array(kps).flatten())
    kps = [round(val) for val in kps]
    anno['keypoints'] = kps
    anno['num_keypoints'] = len([vis for vis in kps[2::3] if vis > 0])

## 2. Avoid duplicate ids

In [16]:
ids_images = {
    'crowd': [idx['id'] for idx in crowd['images'] + main_test['images']],
    'oc': [idx['id'] for idx in oc['images']],
    'mpii': [idx['id'] for idx in mpii['images']],
}
ids_annotations = {
    'crowd': [idx['id'] for idx in crowd['annotations'] + main_test['annotations']],
    'oc': [idx['id'] for idx in oc['annotations']],
    'mpii': [idx['id'] for idx in mpii['annotations']],
}
for dataset, img_ids in ids_images.items():
    print(f'img_ids:{dataset}: {min(img_ids)}-{max(img_ids)}')
print('\n')
for dataset, img_ids in ids_annotations.items():
    print(f'anno_ids:{dataset}: {min(img_ids)}-{max(img_ids)}')
    
BASE_OC = 200_000
BASE_MPII = 300_000

for anno in new_oc['annotations']:
    anno['id'] += BASE_OC
    anno['image_id'] += BASE_OC
for img in new_oc['images']:
    img['id'] += BASE_OC
    
for anno in new_mpii['annotations']:
    anno['id'] += BASE_MPII
    anno['image_id'] += BASE_MPII
for img in new_mpii['images']:
    img['id'] += BASE_MPII
    
# Test
print('\n')
ids_images = {
    'crowd': [idx['id'] for idx in crowd['images'] + main_test['images']],
    'oc': [idx['id'] for idx in new_oc['images']],
    'mpii': [idx['id'] for idx in new_mpii['images']],
}
ids_annotations = {
    'crowd': [idx['id'] for idx in crowd['annotations'] + main_test['annotations']],
    'oc': [idx['id'] for idx in new_oc['annotations']],
    'mpii': [idx['id'] for idx in new_mpii['annotations']],
}
for dataset, img_ids in ids_images.items():
    print(f'img_ids:{dataset}: {min(img_ids)}-{max(img_ids)}')
print('\n')
for dataset, img_ids in ids_annotations.items():
    print(f'anno_ids:{dataset}: {min(img_ids)}-{max(img_ids)}')

img_ids:crowd: 100000-119999
img_ids:oc: 1-5081
img_ids:mpii: 5-24985


anno_ids:crowd: 100001-186784
anno_ids:oc: 1-13360
anno_ids:mpii: 1-28883


img_ids:crowd: 100000-119999
img_ids:oc: 200001-205081
img_ids:mpii: 300005-324985


anno_ids:crowd: 100001-186784
anno_ids:oc: 200001-213360
anno_ids:mpii: 300001-328883


## 3. Remove image overlap 
Remove images from added datasets that are already in crowdpose.

In [17]:
# ... the only overlap is between crowdpose and ochuman (704 images)
# remove images + annotations from ochuman

oc_remove = set()
pairs = overlap['ochuman']['overlap']['image_pairs']
for pair in pairs:
    for (dataset, image) in pair:
        if dataset == 'ochuman':
            oc_remove.add(image)
print(f'{len(oc_remove)=}')

oc_remove_img_id = {image['id'] for image in new_oc['images'] if image['file_name'] in oc_remove}
print(f'{len(oc["images"])=}')
print(f'{len(oc["annotations"])=}')

# For some reason, those loops have to run 3x to remove all 717 images
# Are list ids newly assignes after pops?
# removed_img = [new_oc['images'].pop(i) for i, image in enumerate(new_oc['images']) if image['id'] in oc_remove_img_id]
# removed_anno = [new_oc['annotations'].pop(i) for i, annotation in enumerate(new_oc['annotations']) if annotation['image_id'] in oc_remove_img_id]

# ... do the reverse instead
img = [image for image in new_oc['images'] if image['id'] not in oc_remove_img_id]
anno = [annotation for annotation in new_oc['annotations'] if annotation['image_id'] not in oc_remove_img_id]
new_oc.pop('images')
new_oc['images'] = img
new_oc.pop('annotations')
new_oc['annotations'] = anno

print(f"{len(new_oc['images'])=}")
print(f"{len(new_oc['annotations'])=}")

len(oc_remove)=717
len(oc["images"])=5081
len(oc["annotations"])=10375
len(new_oc['images'])=4364
len(new_oc['annotations'])=8918


## 4. Merge annotations

In [18]:
merge = {}
merge['info'] = {
        "description": "CrowdPose/MPII/OCHuman merge",
        "version": "1.0",
        "year": 2022,
        "date_created": "2022/10/11",
        "merged_by": "John Hoffmann",
        "merge_note": "14 keypoints (CrowdPose). CrowdPose trainval/MPII train/OCHUMAN valtest. COCO syntax. Approx. bbox around keypoints and bbox as segmentation, if no bbox/segmentation was given.",
        "license_mpii":  """Simplified BSD License. Copyright (c) 2015, Max Planck Institute for Informatics. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.""",
        "license_ochuman": """MIT License. Copyright (c) 2018 Roy Tseng. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.""",
    }
merge['categories'] = crowd['categories']
for image in crowd['images']:
    image['dataset'] = 'CrowdPose'
for image in new_oc['images']:
    image['dataset'] = 'OCHuman'
for image in new_mpii['images']:
    image['dataset'] = 'MPII'
merge['images'] = crowd['images'] + new_oc['images'] + new_mpii['images']
merge['annotations'] = crowd['annotations'] + new_oc['annotations'] + new_mpii['annotations']

## 5. Add CrowdPose's crowdIndex
Contains some outliers in CrowdPose due to wrong annotations in CrowdPose + some errors according to the equation stated in the paper. However, for CrowdPose annotations, the orinial crowdIndex is used.

In [19]:
def point_in_rect(point, rect):
    """Check if point is in rectangle."""
    x1, y1, w, h = rect
    x2, y2 = x1+w, y1+h
    x, y = point
    # Count on edge
    if (x1 <= x and x <= x2):
        if (y1 <= y and y <= y2):
            return True
    return False

anno_lookup = {}
crowdpose_image_ids = [image['id'] for image in merge['images'] if image['dataset'] == 'CrowdPose']
for anno in merge['annotations']:
    if anno['image_id'] not in crowdpose_image_ids and not anno['iscrowd']:  
        # keep original crowdindex in crowdpose
        # skip iscrowd instances (contains several people at once)
        if anno['image_id'] not in anno_lookup:
            anno_lookup[anno['image_id']] = []
        anno_lookup[anno['image_id']].append(anno)

crowdedness_all = {}
pbar = tqdm(anno_lookup.items())
for image_id, annos in pbar:
    anno_ids = [anno['id'] for anno in annos]
    bboxes = {}
    keypoints = {}
    crowdedness = {}
    for anno in annos:
        bboxes[anno['id']] = anno['bbox']
        kps = anno['keypoints']
        kps = np.array_split(kps, len(kps)/3)
        kps = [list(kp) for kp in kps if kp[2] > 0]  # visible + hidden
        keypoints[anno['id']] = kps
    for idx in anno_ids:
        bbox = bboxes[idx]
        kp_count = {'kps_self': 0, 'kps_other': 0}
        for anno_id, kps in keypoints.items():
            if idx == anno_id:  # keypoints are in bbox of instance itself
                for kp in kps:
                    if point_in_rect((kp[0], kp[1]), bbox):
                        kp_count['kps_self'] += 1
            else:
                for kp in kps:
                    if point_in_rect((kp[0], kp[1]), bbox):
                        kp_count['kps_other'] += 1
        crowdedness[idx] = kp_count
    crowdedness_all[image_id] = crowdedness
    
# calculate crowdIndex
crowdIndex = {}
for image_id, kps_stats in crowdedness_all.items():
    ci_sum = 0
    num_valid = 0
    for stats in kps_stats.values():
        try:
            ci_sum += stats['kps_other']/stats['kps_self']
            num_valid += 1
        except ZeroDivisionError:
            # people with 0 keypoints
            pass
    crowdIndex[image_id] = 0 if num_valid == 0 else ci_sum/num_valid

for image in merge['images']:
    if image['dataset'] != 'CrowdPose':
        image['crowdIndex'] = round(crowdIndex[image['id']], 2)

  0%|          | 0/21772 [00:00<?, ?it/s]

## Write output and test

In [20]:
# Write output
export = Path(Path(anno_files[0]).parent, 'merge.json')
with open(export, 'w', encoding='utf-8') as f:
    json.dump(merge, f, ensure_ascii=False)

In [11]:
%%capture
# Check dataset in fiftyone
port = 5151
session = fo.launch_app(port=port)
webbrowser.open(f'http://localhost:{port}/')

Connected to FiftyOne on port 5151 at localhost.
If you are not connecting to a remote session, you may need to start a new session and specify a port


In [12]:
# load dataset
dataset = fo.Dataset.from_dir(
    dataset_type = fo.types.COCODetectionDataset,
    label_types = ["detections", "segmentations", "keypoints"],
    data_path = f'/Users/john/datasets/crowdpose_mpii_oc/images',
    labels_path = export,
    max_samples=2000
)



 100% |███████████████| 2000/2000 [47.8s elapsed, 0s remaining, 50.3 samples/s]      


In [13]:
%%capture
# session.view = dataset.view()  # show chosen
session.view = dataset.take(1000).view()  # sample random 1000

### Get subset from each dataset and check keypoint rearrangement in coco-annotator

In [21]:
img_dir = '/Users/john/datasets/crowdpose_mpii_oc/images'
with open(export) as f:
    old = json.load(f)
    new = old.copy()
    new.pop('images')
    new.pop('annotations')
    new['images'] = []
    new['annotations'] = []
    
    image_ids = []
    for dataset in ['CrowdPose', 'OCHuman', 'MPII']:
        i = 0
        for image in old['images']:
            if image['dataset'] == dataset and i < 5:
                new['images'].append(image)
                image_ids.append(image['id'])
                i += 1
    for anno in old['annotations']:
        if anno['image_id'] in image_ids:
            new['annotations'].append(anno)
    
    export_img = Path(export.parent) / Path('img_export')
    os.makedirs(export_img, exist_ok=True)
    for img in new['images']:
        destination = Path(export_img, img['file_name'])
        shutil.copy((Path(img_dir) / img['file_name']), destination)


with open(Path(export.parent, 'export.json'), 'w', encoding='utf-8') as f:
    json.dump(new, f, ensure_ascii=False)