# Create an initial dataset with elements from COCO dataset 
# and elements from densepose dataset condensing left-right in only one class
## all body parts included

## corresponds with version 14


# Getting data

In [None]:
import cv2
import json
from json import JSONEncoder
import pandas as pd
import numpy as np
import pycocotools.mask as mask_util

from tqdm import tqdm

In [None]:
# version number of the json files
version = 14

In [None]:
with open('/home/ubuntu/dormakaba/coco/annotations/instances_val2014.json') as json_file:
    data = json.load(json_file)

In [None]:
with open('/home/ubuntu/dormakaba/coco/annotations/densepose_coco_2014_valminusminival.json') as json_file:
    person_data = json.load(json_file)

In [None]:
data_df = pd.json_normalize(data)

In [None]:
person_data_df = pd.json_normalize(person_data)

In [None]:
data_df.keys()

In [None]:
person_data_df.keys()

In [None]:
# esta es la lista de imágenes anotadas con personas
# está ok
person_data_df['images'][0][0].keys()

In [None]:
print(person_data_df['categories'][0])

# Generating classes

In [None]:
COCO_CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
                'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
                'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
                'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
                'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
                'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
                'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
                'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
                'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
                'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
                'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
                'scissors', 'teddy bear', 'hair drier', 'toothbrush']
# THE NEW CUSTOM CLASSES CONDENSE THE LEFT-RIGHT IN ONLY ONE CAT
CUSTOM_CLASSES = ['person', 'bicycle', 'cat', 'dog', 'backpack', 'umbrella', 'handbag', 'suitcase', 
                'sports ball', 'baseball bat', 'skateboard', 'tennis racket', 'bottle', 'wine glass', 
                'sandwich', 'chair', 'laptop', 'cell phone', 'book', 'clock', 'scissors', 'teddy bear',
                'torso', 'hand', 'foot', 'upleg', 'lowleg', 'uparm', 'lowarm', 'head']

#                'torso', 'hand1', 'hand2', 'foot1', 'foot2', 'upleg1', 'upleg2', 'lowleg1', 'lowleg2',
#                'uparm1', 'uparm2', 'lowarm1','lowarm2', 'head']


FROM_COCO_CLASSES = ['person', 'bicycle', 'cat', 'dog', 'backpack', 'umbrella', 'handbag', 'suitcase', 
                'sports ball', 'baseball bat', 'skateboard', 'tennis racket', 'bottle', 'wine glass', 
                'sandwich', 'chair', 'laptop', 'cell phone', 'book', 'clock', 'scissors', 'teddy bear']

#DENSEPOSE_CLASSES = ['torso', 'hand1', 'hand2', 
#                    'foot1', 'foot2', 'upleg1', 'upleg2', 
#                    'lowleg1', 'lowleg2','uparm1', 'uparm2', 
#                    'lowarm1','lowarm2', 'head']

DENSEPOSE_CLASSES = ['torso', 'hand',  
                        'foot', 'upleg', 
                        'lowleg', 'uparm', 
                        'lowarm', 'head']
COCO_LABEL_MAP = {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8,
                  9: 9, 10: 10, 11: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16,
                  18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24,
                  27: 25, 28: 26, 31: 27, 32: 28, 33: 29, 34: 30, 35: 31, 36: 32,
                  37: 33, 38: 34, 39: 35, 40: 36, 41: 37, 42: 38, 43: 39, 44: 40,
                  46: 41, 47: 42, 48: 43, 49: 44, 50: 45, 51: 46, 52: 47, 53: 48,
                  54: 49, 55: 50, 56: 51, 57: 52, 58: 53, 59: 54, 60: 55, 61: 56,
                  62: 57, 63: 58, 64: 59, 65: 60, 67: 61, 70: 62, 72: 63, 73: 64,
                  74: 65, 75: 66, 76: 67, 77: 68, 78: 69, 79: 70, 80: 71, 81: 72,
                  82: 73, 84: 74, 85: 75, 86: 76, 87: 77, 88: 78, 89: 79, 90: 80}
# this map relates body part position to unique identifier (avoiding left-right problem)
PERSON_LABEL_MAP = {0:0, 1: 1, 2: 1, 
                    3: 2, 4: 2, 5: 3, 6: 3, 
                    7: 4, 8: 4, 9: 5, 10: 5, 
                    11: 6, 12: 6, 13: 7}

In [None]:
REVERSE_COCO_LABEL_MAP = {}
for key, value in COCO_LABEL_MAP.items():
    REVERSE_COCO_LABEL_MAP[value] = key

In [None]:
print(data_df['categories'][0][0])

custom_categories = []
for cat_data in data_df['categories'][0]:
    if cat_data['name'] in CUSTOM_CLASSES:
        custom_categories.append(cat_data)

for nc in DENSEPOSE_CLASSES:
    c = {'supercategory': 'person', 'id': 1, 'name': nc}
    custom_categories.append(c)

# this is the list of the new categories
print(custom_categories)
print()
print(len(CUSTOM_CLASSES), "-", len(custom_categories))
cc_id = 1
cc_list = [0]*(len(CUSTOM_CLASSES)+1)
for cc in custom_categories:
    cc['id'] = cc_id
    cc_list[cc_id] = cc['name']
    cc_id += 1

# the list of the names of the categories
print(cc_list)

In [None]:
coco_category_id_list = [0]*len(cc_list)
for cat in data_df['categories'][0]:
    if cat['name'] in cc_list:
        pos = cc_list.index(cat['name'])
        coco_category_id_list[pos] = cat['id']

# this is the list of the COCO ids of the new categories (when exists)
print(coco_category_id_list)

In [None]:
# this variable has the coco category id in the position of the custom category
custom_category_coco_category = [0]*len(cc_list)
for i, cat_name in enumerate(COCO_CLASSES):
    if cat_name in cc_list:
        cc_pos = cc_list.index(cat_name)
        coco_cat_id = REVERSE_COCO_LABEL_MAP[i+1]
        custom_category_coco_category[cc_pos] = coco_cat_id
        # print(i, cat_name, cc_pos, coco_cat_id)

print(custom_category_coco_category)

# looking at images

In [None]:
new_person_images = person_data_df['images'][0]
print( len(new_person_images), new_person_images[0].keys())

In [None]:
images_person_data_df = pd.DataFrame(person_data_df['images'][0])

In [None]:
# a = images_person_data_df[images_person_data_df['id']==262145]
# print(a['height'][0], a['width'][0])

In [None]:
person_data_df['images'][0][0]

In [None]:
# list of image ids from human parts data
new_person_images_list = [a['id'] for a in person_data_df['images'][0]]
len(new_person_images_list)

In [None]:
all_images_list = [a['id'] for a in data_df['images'][0]]
len(all_images_list)

# looking at images
## Those are the images I'll use in the dataset that come from 
## the DENSEPOSE dataset

In [None]:
# person annotations
# extraer segmentation: anotaciòn individual de cada persona detectada
# extrater dp_masks: e insertar una anotación por cada uno de los 14 elementos de la persona que tienen anotaciones
# si se puede, transformar el mapa de bits en 
print(person_data_df['annotations'][0][0].keys())
print(person_data_df['annotations'][0][0]['dp_masks'])

In [None]:
print(person_data_df['annotations'][0][0])

# generating annotations

In [None]:
def GetDensePoseMasks(Polys):
    mask_list = [0]*15
    for i in range(1,15):
        MaskGen = np.zeros([256,256])
        if(Polys[i-1]):
            current_mask = mask_util.decode(Polys[i-1])
            MaskGen[current_mask>0] = 1
        mask_list[i-1]=MaskGen
    return mask_list

In [None]:
def GetDensePoseMask(encoded_mask):
    MaskGen = np.zeros([256,256])
    current_mask = mask_util.decode(encoded_mask)
    MaskGen[current_mask>0] = 1
    return MaskGen

In [None]:
def get_poligons(mask):
    c = cv2.findContours(mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours = [0]*2*len(c[0][0].tolist())
    for i,x in enumerate(c[0][0].tolist()):
        contours[2*i]=int(x[0][0])
        contours[2*i+1]= int(x[0][1])
    return contours

In [None]:
def CreateMaskBBox(mask, bbr, image_shape):
    final_mask = np.zeros([image_shape[1],image_shape[0]])
    x1 = int(bbr[0])
    y1 = int(bbr[1])
    x2 = int(bbr[0]+bbr[2])
    y2 = int(bbr[1]+bbr[3])
    x2 = min( [ x2, image_shape[0] ] )
    y2 = min( [ y2, image_shape[1] ] )
    # print(bbr, (x1,y1,x2,y2), (int(x2-x1),int(y2-y1)), mask.shape)
    resized_mask = cv2.resize( mask, (int(x2-x1),int(y2-y1)) ,interpolation=cv2.INTER_NEAREST)
    final_mask[y1:y2,x1:x2] = resized_mask
    az = np.nonzero(final_mask)
    bby1,bby2,bbx1,bbx2=np.min(az[0]), np.max(az[0]), np.min(az[1]), np.max(az[1])
    contours = get_poligons(final_mask.astype(np.uint8))
    encoded_final_mask = mask_util.encode(np.asfortranarray(final_mask.astype(np.uint8)))
    mask_area = int(mask_util.area(encoded_final_mask))
    # print((bbx1, bby1, bbx2-bbx1, bby2-bby1),encoded_final_mask)
    encoded_final_mask['counts'] = encoded_final_mask['counts'].decode("utf-8")
    return final_mask, (bbx1, bby1, bbx2-bbx1, bby2-bby1), mask_area, encoded_final_mask, contours

In [None]:
new_person_ann = []
for annotation in tqdm(person_data_df['annotations'][0]):
    # print(annotation['image_id'], annotation['bbox'])
    if annotation['iscrowd']==0 and annotation['image_id'] in new_person_images_list:
        ann_segment = annotation['segmentation']
        ann_bbox = [int(annotation['bbox'][0]),int(annotation['bbox'][1]),int(annotation['bbox'][2]),int(annotation['bbox'][3])]
        ann_area = annotation['area']
        ann_image_id = annotation['image_id']
        ann_id = annotation['id']
        ann_cat_id = 1 # only person cat so must be 1
        new_data = {'segmentation': ann_segment,
                    'area': ann_area,
                    'image_id': ann_image_id,
                    'id': ann_id,
                    'category_id': ann_cat_id,
                    'bbox': ann_bbox,
                    'iscrowd': 0}
        new_person_ann.append(new_data)
        a = images_person_data_df[images_person_data_df['id']==ann_image_id]
        image_size = [a['width'].values[0],a['height'].values[0]]

        if 'dp_masks' in annotation:
            for i, ann_mask in enumerate(annotation['dp_masks']):
                if len(ann_mask) > 1:
                    ann_mask_decoded = GetDensePoseMask(ann_mask)
                    mask, bbox, ann_area, encoded_mask, contours = CreateMaskBBox(ann_mask_decoded, annotation['bbox'], image_size )
                    ann_bbox = [int(bbox[0]),int(bbox[1]),int(bbox[2]),int(bbox[3])]
                    ann_segment = encoded_mask
                    ann_image_id = annotation['image_id']
                    ann_id = -1
                    # for reusing ids I'll prepare a first approach 
                    # with a cat id dictionary
                    ann_cat_id = 23+PERSON_LABEL_MAP[i]
                    if len(contours)>4:
                        new_data = {'segmentation': [contours], # encoded_mask,
                                    'area': ann_area,
                                    'image_id': ann_image_id,
                                    'id': ann_id,
                                    'category_id': ann_cat_id,
                                    'bbox': ann_bbox,
                                    'iscrowd': 0}
                        new_person_ann.append(new_data)

In [None]:
# check that the list of annotated image_ids has the len len as densepose image id's
new_person_images_annotated_list = [a['image_id'] for a in new_person_ann]
print( len(new_person_images_annotated_list), len(set(new_person_images_annotated_list)), len(set(new_person_images_list)))

# till here I have all human data and parts segmented and annotated
# next step is to include the other classes

In [None]:
new_data_ann = []
new_data_image_list = []
new_data_images = []
for annotation in tqdm(data_df['annotations'][0]):
    if annotation['category_id'] in custom_category_coco_category and annotation['category_id']>0:
        ann_segment = annotation['segmentation']
        ann_bbox = [int(annotation['bbox'][0]),int(annotation['bbox'][1]),int(annotation['bbox'][2]),int(annotation['bbox'][3])]
        ann_area = annotation['area']
        ann_image_id = annotation['image_id']
        ann_id = annotation['id']
        ann_iscrowd = annotation['iscrowd']
        ann_cat_id = custom_category_coco_category.index(annotation['category_id'])
        new_data = {'segmentation': ann_segment,
                    'area': ann_area,
                    'image_id': ann_image_id,
                    'id': ann_id,
                    'category_id': ann_cat_id,
                    'bbox': ann_bbox,
                    'iscrowd': ann_iscrowd}
        # new_person_ann.append(new_data)
        new_data_image_list.append(ann_image_id)

In [None]:
new_data_images = [image for image in data_df['images'][0] if image['id'] in new_data_image_list]

# Saving humans parts data

In [None]:

new_human_part_data = {
    'images': new_person_images, 
    'annotations': new_person_ann, 
    'categories': custom_categories
}

In [None]:
class MyEncoder(JSONEncoder):
    def default(self, o):
        return o.__dict__
new_human_part_data_json = MyEncoder().encode(new_human_part_data)

In [None]:
with open(f'/home/ubuntu/dormakaba/coco/annotations/custom_ann_human_parts_valid2014_v{version}.json', 'w') as f:
    json.dump(new_human_part_data_json, f)

# Saving non humans data


In [None]:

new_non_human_data = {
    'images': new_data_images, 
    'annotations': new_data_ann, 
    'categories': custom_categories
}

In [None]:
class MyEncoder(JSONEncoder):
    def default(self, o):
        return o.__dict__
new_non_human_data_json = MyEncoder().encode(new_non_human_data)

In [None]:
with open(f'/home/ubuntu/dormakaba/coco/annotations/custom_ann_non_human_valid2014_v{version}.json', 'w') as f:
    json.dump(new_non_human_data_json, f)

# updating indices

In [None]:
final_images = new_person_images + new_data_images
final_annotations = new_person_ann + new_data_ann

In [None]:
nil = []
for image in tqdm(final_images):
    if image["id"] not in all_images_list:
        nil.append[image["id"]]
len(nil)

In [None]:
final_annotations[0].keys()

In [None]:
for i in tqdm(range(len(final_annotations))):
    final_annotations[i]['id'] = i+1

In [None]:
nil = []
for image in tqdm(final_images):
    if image["id"] not in all_images_list:
        nil.append[image["id"]]
len(nil)

# saving all data

In [None]:
# IN THE version_02
new_person_data = {
    'images': final_images, 
    'annotations': final_annotations, 
    'categories': custom_categories
}

In [None]:
from json import JSONEncoder
class MyEncoder(JSONEncoder):
    def default(self, o):
        return o.__dict__
new_person_data_json = MyEncoder().encode(new_person_data)

In [None]:
with open(f'/home/ubuntu/dormakaba/coco/annotations/custom_ann_valid2014_v{version}.json', 'w') as f:
    json.dump(new_person_data_json, f)