In [1]:
import json
import sys
from pycocotools.coco import COCO
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil

The following code will take the apt36k dataset and convert it to a coco and yolo format

In [50]:
main_folder = '13zebra'
exps = os.listdir(main_folder)
outdir = os.path.join('.','ap36k_zebra')
outdir_images = os.path.join(outdir,'images')
outdir_labels = os.path.join(outdir,'labels')
outdir_labels_coco = os.path.join(outdir,'labels_coco')
for od in [outdir, outdir_images, outdir_labels, outdir_labels_coco]:
    if not os.path.exists(od):
        os.makedirs(od)
        
for exp in exps:
    files = os.listdir(os.path.join(main_folder,exp))
    for file in files:
        if file[-5:] == '.json':
            fname = file[:-5]
            out_fname = f'{exp}_{fname}'
            shutil.copy(os.path.join(main_folder,exp,f"{fname}.jpg"),os.path.join(outdir_images,out_fname+".jpg"))
            with open(os.path.join(main_folder,exp,f"{fname}.json"),"r") as f:
                annos = json.load(f)
                imwidth = annos['imageWidth']
                imheight = annos['imageHeight']
                
                with open(os.path.join(outdir_labels,out_fname+".txt"),'w') as of, open(os.path.join(outdir_labels_coco,out_fname+".txt"),'w') as of_coco:
                    for shape in annos['shapes']:
                        if shape['shape_type'] == 'rectangle': # is bbox
                            c = 0
                            x_center = (shape['points'][0][0] + shape['points'][1][0])/(2*imwidth)
                            y_center = (shape['points'][0][1] + shape['points'][1][1])/(2*imheight)
                            lx = min(shape['points'][0][0], shape['points'][1][0])
                            rx = max(shape['points'][0][0], shape['points'][1][0])
                            uy = min(shape['points'][0][1], shape['points'][1][1])
                            dy = max(shape['points'][0][1], shape['points'][1][1])
                            width = (rx-lx)/(imwidth)
                            height = (dy-uy)/(imheight)
                            of.write(f"{c} {x_center} {y_center} {width} {height}\n")
                            c = 22
                            of_coco.write(f"{c} {x_center} {y_center} {width} {height}\n")


# Map the keypoints

17 (apt10/36k) to 27 synthetic

In [2]:
def map_keypoints(k):
    tmp = [0]*81
    tmp[0:3] = k[39:42] # left back paw
    tmp[3:6] = k[36:39] # left back knee
    tmp[6:9] = k[33:36] # left back thigh
    
    tmp[9:12]  = k[48:51] # r back paw
    tmp[12:15] = k[45:48] # r back knee
    tmp[15:18] = k[42:45] # r back thigh
    
    tmp[18:21] = k[30:33] # r f p
    tmp[21:24] = k[27:30] # r f k
    tmp[24:27] = k[24:27] # r f t
    
    tmp[27:30] = k[21:24] # l f p
    tmp[30:33] = k[18:21] # l f k
    tmp[33:36] = k[15:18] # l f t
    
    #tmp[36:39] tail end
    #tmp[39:42] tail start
    #tmp[42:45, 45:48, 48:51, 51:54] # right ear tip, base, left ear tip, base
    
    tmp[54:57] = k[3:6] # right eye
    tmp[57:60] = k[0:3] # left eye
    tmp[60:63] = k[6:9] # nose
    
    tmp[78:81] = k[9:12] # neck
    tmp[75:78] = k[12:15] # root of tail
    
    # 63-66, neck start 66-69, neck end 69-72, skull 72-75, body middle 75-78, back end 78-81 back front
    return tmp

Use the following if you want to filter and/or create a dataset from the apt36k_annotations.json file

In [53]:
apk36_used_valid_folders = ['video2','video5','video13','video19']

In [59]:
# set the apt36k annotations
infile = '/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/apt36k_annotations.json'
# set the target validation and training folders
train_targets = ['/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/real_zebras_old/train/images']
valid_targets = ['/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/real_zebras_old/valid/images']
# if copy images
cp_images = True
# input image folder
input_image_folder = '/media/ebonetto/WindowsData/real_zebras/ap36k_zebra/valid/images/'

f = json.load(open(infile,'r'))
train = []
val = []
train_ann = []
val_ann = []
train_id = []
val_id = []
sf = '/media/ebonetto/WindowsData/real_zebras/ap36k_zebra/valid/images'
for j in f['images']:
    if '13zebra' in j['file_name']:
        tmpfn = 'video'+j['file_name'].split('video')[1].split('_')[0]
        j['file_name'] = 'video'+j['file_name'].split('video')[1].replace('\\','_')
        if tmpfn not in apk36_used_valid_folders:
            train.append(j)
            train_id.append(j['id'])
            if cp_images:
                for tar in train_targets:
                    shutil.copy(os.path.join(input_image_folder, j['file_name']), tar)
        else:
            val.append(j)
            val_id.append(j['id'])
            if cp_images:
                for tar in valid_targets:
                    shutil.copy(os.path.join(input_image_folder, j['file_name']), tar)
                
for j in f['annotations']:
    j['category_id'] = 23
    j['num_keypoints'] = 27
    if j['image_id'] in train_id:
        j['keypoints'] = map_keypoints(j['keypoints'])
        train_ann.append(j)
    elif j['image_id'] in val_id:
        j['keypoints'] = map_keypoints(j['keypoints'])
        val_ann.append(j)
    

In [66]:
f_train = json.load(open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/apt36k_annotations.json",'r'))
f_valid = json.load(open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/apt36k_annotations.json",'r'))

In [60]:
f_valid['images'] = val
f_valid['annotations'] = val_ann
f_valid['categories'] = [{'name': 'person', 'id': 1, 'supercategory': 'person'},
 {'supercategory': 'animal',
  'id': 23,
  'name': 'zebra',
  'keypoints': ['left_back_paw',
   'left_back_knee',
   'left_back_thigh',
   'right_back_paw',
   'right_back_knee',
   'right_back_thigh',
   'right_front_paw',
   'right_front_knee',
   'right_front_thigh',
   'left_front_paw',
   'left_front_knee',
   'left_front_thigh',
   'tail_end',
   'tail_base',
   'right_ear_tip',
   'right_ear_base',
   'left_ear_tip',
   'left_ear_base',
   'right_eye',
   'left_eye',
   'nose',
   'neck_start',
   'neck_end',
   'skull',
   'body_middle',
   'back_end',
   'back_front'],
  'skeleton': [[1, 2],
   [2, 3],
   [3, 26],
   [4, 5],
   [5, 6],
   [6, 26],
   [7, 8],
   [8, 9],
   [10, 11],
   [11, 12],
   [13, 14],
   [15, 16],
   [17, 18],
   [16, 19],
   [19, 20],
   [18, 20],
   [19, 21],
   [20, 21],
   [19, 24],
   [20, 24],
   [21, 24],
   [24, 23],
   [23, 22],
   [22, 27],
   [27, 9],
   [27, 12],
   [27, 25],
   [25, 26],
   [26, 14]]},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'}]

f_train['images'] = val
f_train['annotations'] = val_ann
f_train['categories'] = [{'name': 'person', 'id': 1, 'supercategory': 'person'},
 {'supercategory': 'animal',
  'id': 23,
  'name': 'zebra',
  'keypoints': ['left_back_paw',
   'left_back_knee',
   'left_back_thigh',
   'right_back_paw',
   'right_back_knee',
   'right_back_thigh',
   'right_front_paw',
   'right_front_knee',
   'right_front_thigh',
   'left_front_paw',
   'left_front_knee',
   'left_front_thigh',
   'tail_end',
   'tail_base',
   'right_ear_tip',
   'right_ear_base',
   'left_ear_tip',
   'left_ear_base',
   'right_eye',
   'left_eye',
   'nose',
   'neck_start',
   'neck_end',
   'skull',
   'body_middle',
   'back_end',
   'back_front'],
  'skeleton': [[1, 2],
   [2, 3],
   [3, 26],
   [4, 5],
   [5, 6],
   [6, 26],
   [7, 8],
   [8, 9],
   [10, 11],
   [11, 12],
   [13, 14],
   [15, 16],
   [17, 18],
   [16, 19],
   [19, 20],
   [18, 20],
   [19, 21],
   [20, 21],
   [19, 24],
   [20, 24],
   [21, 24],
   [24, 23],
   [23, 22],
   [22, 27],
   [27, 9],
   [27, 12],
   [27, 25],
   [25, 26],
   [26, 14]]},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'}]

In [61]:
json.dump(f_valid, open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/real_zebras_old/valid/valid_27kp.json",'w'))
json.dump(f_train, open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/real_zebras_old/train/train_27kp.json",'w'))

Use the following if you just want to convert some 17kp to 27kp

In [4]:
json_file = '/ps/project/irotate/vit_data/zebra-300/annotations/val_17kps.json'
out_fname = '/ps/project/irotate/vit_data/zebra-300/annotations/val_27kps.json'
anns = json.load(open(json_file,'r'))

for ann in anns['annotations']:
    ann['keypoints'] = map_keypoints(ann['keypoints'])
    ann['category_id'] = 23
    ann['num_keypoints'] = 27
    
anns['categories'] = [{'name': 'person', 'id': 1, 'supercategory': 'person'},
 {'supercategory': 'animal',
  'id': 23,
  'name': 'zebra',
  'keypoints': ['left_back_paw',
   'left_back_knee',
   'left_back_thigh',
   'right_back_paw',
   'right_back_knee',
   'right_back_thigh',
   'right_front_paw',
   'right_front_knee',
   'right_front_thigh',
   'left_front_paw',
   'left_front_knee',
   'left_front_thigh',
   'tail_end',
   'tail_base',
   'right_ear_tip',
   'right_ear_base',
   'left_ear_tip',
   'left_ear_base',
   'right_eye',
   'left_eye',
   'nose',
   'neck_start',
   'neck_end',
   'skull',
   'body_middle',
   'back_end',
   'back_front'],
  'skeleton': [[1, 2],
   [2, 3],
   [3, 26],
   [4, 5],
   [5, 6],
   [6, 26],
   [7, 8],
   [8, 9],
   [10, 11],
   [11, 12],
   [13, 14],
   [15, 16],
   [17, 18],
   [16, 19],
   [19, 20],
   [18, 20],
   [19, 21],
   [20, 21],
   [19, 24],
   [20, 24],
   [21, 24],
   [24, 23],
   [23, 22],
   [22, 27],
   [27, 9],
   [27, 12],
   [27, 25],
   [25, 26],
   [26, 14]]},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'}]

json.dump(anns, open(out_fname,'w'))

# merge jsons

Used to merge real and synthetic dataset jsons

In [11]:
j1 = json.load(open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/zebras_apt_valid.json",'r'))
j2 = json.load(open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/syn_zebras/valid/valid_filtered.json",'r'))
of = "/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/merged_zebras/valid/valid_full.json"

In [12]:
j3 = json.load(open(of,'r'))

In [21]:
print(len(j3['annotations']))

67520


In [18]:
import random

tmp = {}
tmp['images'] = []
tmp['annotations'] = []
tmp['categories'] = j1['categories']
counter = 1
ann_counter = 1
for j in j1['images']:
    old_id = j['id']
    j['id'] = counter
    tmp['images'].append(j)
    for ann in j1['annotations']:
        if ann['image_id'] == old_id:
            ann['id'] = ann_counter
            ann['image_id'] = counter
            tmp['annotations'].append(ann)
            ann_counter += 1
    counter += 1
print(len(tmp['annotations']))
for idx, j in enumerate(j2['images']):
#     print(f"{idx} over 14401")
#     if random.random() < .9:
#         continue
    old_id = j['id']
    j['id'] = counter
    tmp['images'].append(j)
    for ann in j2['annotations']:
        if ann['image_id'] == old_id:
            ann['id'] = ann_counter
            ann_counter += 1
            ann['image_id'] = counter
#             ann['bbox'][0] = max(0,ann['bbox'][0])
#             ann['bbox'][1] = max(0,ann['bbox'][1])
#             ann['bbox'][2] = min(j['width']-ann['bbox'][0],ann['bbox'][2])
#             ann['bbox'][3] = min(j['height']-ann['bbox'][1],ann['bbox'][3])
            tmp['annotations'].append(ann)
    counter += 1
print(len(tmp['annotations']))


315
59904


In [22]:
json.dump(tmp, open(of,'w'))
del j1
del j2 
del tmp

In [9]:
j1 = json.load(open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/zebras_apt_train.json",'r'))
j2 = json.load(open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/syn_zebras/train/train_filtered.json",'r'))
# j4 = json.load(open("/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/syn_zebras/train/train.json",'r'))
of = "/media/ebonetto/WindowsData/pose_zebras_sw/ViTPose/data/merged_zebras/train/train_full.json"
j3 = json.load(open(of,'r'))

In [6]:
import random

tmp = {}
tmp['images'] = []
tmp['annotations'] = []
tmp['categories'] = j1['categories']
counter = 1
ann_counter = 1
for j in j1['images']:
    old_id = j['id']
    j['id'] = counter
    tmp['images'].append(j)
    for ann in j1['annotations']:
        if ann['image_id'] == old_id:
            ann['id'] = ann_counter
            ann['image_id'] = counter
            tmp['annotations'].append(ann)
            ann_counter += 1
    counter += 1
import collections
anns = collections.defaultdict(list)
for ann in j2['annotations']:
    anns[ann['image_id']].append(ann)
    
for idx, j in enumerate(j2['images']):
    print(f"{idx} over 14401", end='\r')
#     if random.random() < .9:
#         continue
    old_id = j['id']
    
    j['id'] = counter
    tmp['images'].append(j)
    for ann in anns[old_id]:
        ann['id'] = ann_counter
        ann_counter += 1
        ann['image_id'] = counter
#             ann['bbox'][0] = max(0,ann['bbox'][0])
#             ann['bbox'][1] = max(0,ann['bbox'][1])
#             ann['bbox'][2] = min(j['width']-ann['bbox'][0],ann['bbox'][2])
#             ann['bbox'][3] = min(j['height']-ann['bbox'][1],ann['bbox'][3])
        tmp['annotations'].append(ann)
    counter += 1
print(len(tmp['annotations']))


0 over 144011 over 144012 over 144013 over 144014 over 144015 over 144016 over 144017 over 144018 over 144019 over 1440110 over 1440111 over 1440112 over 1440113 over 1440114 over 1440115 over 1440116 over 1440117 over 1440118 over 1440119 over 1440120 over 1440121 over 1440122 over 1440123 over 1440124 over 1440125 over 1440126 over 1440127 over 1440128 over 1440129 over 1440130 over 1440131 over 1440132 over 1440133 over 1440134 over 1440135 over 1440136 over 1440137 over 1440138 over 1440139 over 1440140 over 1440141 over 1440142 over 1440143 over 1440144 over 1440145 over 1440146 over 1440147 over 1440148 over 1440149 over 1440150 over 1440151 over 1440152 over 1440153 over 1440154 over 1440155 over 1440156 over 1440157 over 1440158 over 1440159 over 1440160 over 1440161 over 1440162 over 1440163 over 1440164 over 1440165 over 1440166 over 1440167 over 1440168 over 1440169 over 1440170 over 1440171 over 1440172

10946 over 1440110947 over 1440110948 over 1440110949 over 1440110950 over 1440110951 over 1440110952 over 1440110953 over 1440110954 over 1440110955 over 1440110956 over 1440110957 over 1440110958 over 1440110959 over 1440110960 over 1440110961 over 1440110962 over 1440110963 over 1440110964 over 1440110965 over 1440110966 over 1440110967 over 1440110968 over 1440110969 over 1440110970 over 1440110971 over 1440110972 over 1440110973 over 1440110974 over 1440110975 over 1440110976 over 1440110977 over 1440110978 over 1440110979 over 1440110980 over 1440110981 over 1440110982 over 1440110983 over 1440110984 over 1440110985 over 1440110986 over 1440110987 over 1440110988 over 1440110989 over 1440110990 over 1440110991 over 1440110992 over 1440110993 over 1440110994 over 1440110995 over 1440110996 over 1440110997 over 1440110998 over 1440110999 over 1440111000 over 1440111001 over 1440111002 over 1440111003 over 1440111004 over 144

In [8]:
json.dump(tmp, open(of,'w'))