In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import pickle
from tqdm import tqdm_notebook as tqdm
from collections import defaultdict
import os
import json

In [3]:
def make_save_dir(save_dir):
    if save_dir != None:
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
            
def make_subset(annotations, cls_div_ids):
    dict_annos = defaultdict(list)
    for id, anno_details in tqdm(annotations.items()):
        subset_annos = []
        for anno in anno_details:
            if anno["LabelName"] in cls_div_ids:
                subset_annos.append(anno)
        if len(subset_annos)>0:
            dict_annos[id] = subset_annos

    idx_to_ids = {i:key for i, (key, _) in enumerate(dict_annos.items())}
    return dict_annos, idx_to_ids

def make_img_id_subset(annotations, cls_div_ids, valid_img_ids):
    dict_annos = defaultdict(list)
    
    for id in valid_img_ids:
        try:
            anno_details = annotations[id]
        except:
            print(id)
        subset_annos = []
        for anno in anno_details:
            if anno["LabelName"] in cls_div_ids:
                subset_annos.append(anno)
        if len(subset_annos)>0:
            dict_annos[id] = subset_annos

    idx_to_ids = {i:key for i, (key, _) in enumerate(dict_annos.items())}
    return dict_annos, idx_to_ids

In [4]:
human_labels_dir = "../dataset/class-ids-human-body-parts-and-mammal.txt"
img_ids_dir = "../dataset/train-image-ids-with-human-parts-and-mammal-boxes.txt"

train_bbox_dir = "../data_info/train/annotations/train-anno.json"
valid_bbox_dir = "../data_info/valid/annotations/valid-anno.json"
clsids_to_names_dir = "../data_info/clsids_to_names.pkl"
save_dir = "../data_info_subsets/"

In [6]:
cls_to_names = pickle.load(open(clsids_to_names_dir,'rb'))
human_labels_list = list(pd.read_csv(human_labels_dir, header=None)[0])
human_img_ids_list = list(pd.read_csv(img_ids_dir, header=None)[0])

In [7]:
important_human_label_list = []
for hl in human_labels_list:
    try:
        print(cls_to_names[hl])
        important_human_label_list.append(hl)
    except:
        print(hl)

Human eye
Human beard
Human mouth
/m/02p0tk3
Human foot
Human leg
Human ear
Human hair
Human head
/m/04rky
Human arm
Human nose
Human hand


In [8]:
train_annotations = json.loads(open(train_bbox_dir,'r').read())
valid_annotations = json.loads(open(valid_bbox_dir,'r').read())

## Creating Validation Set for Human Labels

In [9]:
dict_valid_annos, valid_idx_to_ids = make_subset(valid_annotations, important_human_label_list)

HBox(children=(IntProgress(value=0, max=34917), HTML(value='')))




In [10]:
len(dict_valid_annos), len(valid_idx_to_ids)

(7449, 7449)

In [11]:
human_valid_save_dir = save_dir+"humanparts/"+"valid/annotations"
make_save_dir(human_valid_save_dir)
with open(human_valid_save_dir+"/valid-anno.json", 'w') as fp:
    json.dump(dict(dict_valid_annos), fp)
    
with open(human_valid_save_dir+"/valid-idx_to_id.pkl", 'wb') as handle:
    pickle.dump(valid_idx_to_ids, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Creating Training Set for Human Labels

In [12]:
dict_train_annos, train_idx_to_ids = make_subset(train_annotations, important_human_label_list)

HBox(children=(IntProgress(value=0, max=1674979), HTML(value='')))




In [13]:
len(dict_train_annos), len(train_idx_to_ids)

(84425, 84425)

In [14]:
human_train_save_dir = save_dir+"humanparts/"+"train/annotations"
make_save_dir(human_train_save_dir)
with open(human_train_save_dir+"/train-anno.json", 'w') as fp:
    json.dump(dict(dict_train_annos), fp)
    
with open(human_train_save_dir+"/train-idx_to_id.pkl", 'wb') as handle:
        pickle.dump(train_idx_to_ids, handle, protocol=pickle.HIGHEST_PROTOCOL)

## Save Class Info for Human Labels

In [15]:
human_cls_to_names = {lb:cls_to_names[lb] for lb in important_human_label_list}
human_clsids_to_idx = {lb:i+1 for i, lb in enumerate(important_human_label_list)}
human_clsids_to_idx["background"] = 0
human_cls_to_names["background"] = "background"

In [16]:
human_cls_to_names

{'/m/014sv8': 'Human eye',
 '/m/015h_t': 'Human beard',
 '/m/0283dt1': 'Human mouth',
 '/m/031n1': 'Human foot',
 '/m/035r7c': 'Human leg',
 '/m/039xj_': 'Human ear',
 '/m/03q69': 'Human hair',
 '/m/04hgtk': 'Human head',
 '/m/0dzf4': 'Human arm',
 '/m/0k0pj': 'Human nose',
 '/m/0k65p': 'Human hand',
 'background': 'background'}

In [17]:
human_clsids_to_idx

{'/m/014sv8': 1,
 '/m/015h_t': 2,
 '/m/0283dt1': 3,
 '/m/031n1': 4,
 '/m/035r7c': 5,
 '/m/039xj_': 6,
 '/m/03q69': 7,
 '/m/04hgtk': 8,
 '/m/0dzf4': 9,
 '/m/0k0pj': 10,
 '/m/0k65p': 11,
 'background': 0}

In [18]:
human_class_save_dir = save_dir+"humanparts"
with open(human_class_save_dir+"/clsids_to_names.pkl", 'wb') as handle:
        pickle.dump(human_cls_to_names, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
with open(human_class_save_dir+"/clsids_to_idx.pkl", 'wb') as handle:
        pickle.dump(human_clsids_to_idx, handle, protocol=pickle.HIGHEST_PROTOCOL)