In [None]:
from dataset_interface.coco import COCODataAPI
import numpy as np
import matplotlib.pyplot as plt
import yaml
import os
from imageio import imread, imwrite
import copy
import xml.etree.ElementTree
import glob
import shutil

In [None]:
data_dir = 'coco/'
config_file_val = 'coco/sample_coco_configs_val.yml'
config_file_train = 'coco/sample_coco_configs_train.yml'
coco_api_validation = COCODataAPI(data_dir, config_file_val)
coco_api_training = COCODataAPI(data_dir, config_file_train)

## Categories in dataset

In [None]:
for k, category in coco_api_training.get_sub_categories('furniture').items():
    print(k, category.name)

In [None]:
# List of desired coco categories: 

desired_categories_to_id = {'spoon':50, 'orange':55, 'dining table':67, 'bowl':51, \
                           'cup':47, 'fork':48, 'bed':65, 'chair':62, 'couch':63, 'knife': 49, 'person': 1}

coco_cat_id_to_name = dict()
for key,value in desired_categories_to_id.items():
    coco_cat_id_to_name[value] = key
coco_cat_id_to_name

In [None]:
# coco_api.get_images_in_category(55)

## Load the images id and image object of the desired categories

In [None]:
train_images_dictionaries = {}
val_images_dictionaries = {}

print('Loading images in categories {}'.format(list(desired_categories_to_id.keys())))
for key, category_id in desired_categories_to_id.items():
    print('collecting images of {} with Id {} '.format(key,category_id))
    for image_id, image_inf in coco_api_training.get_images_in_category(category_id).items():
        if image_id in train_images_dictionaries:
            continue
        boxes = coco_api_training.get_bounding_boxes_by_ids(image_id, list(desired_categories_to_id.values()))
        train_images_dictionaries[image_id] = {'info': image_inf, 'bounding_boxes': boxes}
    
    for image_id, image_inf in coco_api_validation.get_images_in_category(category_id).items():
        if image_id in val_images_dictionaries:
            continue
        boxes = coco_api_validation.get_bounding_boxes_by_ids(image_id, list(desired_categories_to_id.values()))
        val_images_dictionaries[image_id] = {'info': image_inf, 'bounding_boxes': boxes}
    
print('Images loaded')

In [None]:
len(val_images_dictionaries)

## Remove images 

In [None]:
filenames = glob.glob('robocup_objects/training_images/*.jpg')
for fn in filenames:
    img_num = int(os.path.basename(fn).split('.')[0])
    if img_num > 15199:
        os.remove(fn)
print('done')

## Read the annotations file

In [None]:
val_annotations_file = 'robocup_objects/val_annotations.yml'
with open(val_annotations_file, 'r') as annotation_file:
    val_annotations = yaml.safe_load(annotation_file)
    
print('Validation annotations ', len(val_annotations))
    
train_annotations_file = 'robocup_objects/train_annotations.yml'
with open(train_annotations_file, 'r') as annotation_file:
    train_annotations = yaml.safe_load(annotation_file)

print('Training annotations ', len(train_annotations))


## Get the number of the latest image generated

In [None]:
img_name = val_annotations[-1]['image_name']
latest_image_num = int(os.path.basename(img_name).replace('.jpg',''))
print(val_annotations[-1].keys())

## Load classes file

In [None]:
classes_file = 'classes_robocup.yml'
with open(classes_file, 'r') as classes_file:
    classes = yaml.safe_load(classes_file)

classes_to_id = dict()
for key,value in classes.items():
    classes_to_id[value] = key

## Annotation format 
* image_name 
* objects:
    - category_id 
    - xmin
    - xmax
    - ymin
    - ymax

In [None]:
# Flag 
dataset = 'train'
output_annotations = []

cat_idx = 0
desired_categories = list(desired_categories_to_id.keys())
print(desired_categories)

if dataset == 'val':
    images_dictionaries = copy.deepcopy(val_images_dictionaries)
    img_name_ = val_annotations[-1]['image_name']
    latest_image_num = int(os.path.basename(img_name_).replace('.jpg',''))
    print('Starting to add validation images at index ', latest_image_num)
else:
    images_dictionaries = copy.deepcopy(train_images_dictionaries)
    img_name_ = train_annotations[-1]['image_name']
    latest_image_num = int(os.path.basename(img_name_).replace('.jpg',''))
    print('Starting to add training images at index ', latest_image_num)

# for dict_ in images_dictionaries:
#     category = desired_categories[cat_idx]
#     print('Images in category ', category)
for image_id, image_data in images_dictionaries.items():
# shutil.copyfile
    latest_image_num += 1

    print('Adding image number {}'.format(latest_image_num))

    if dataset == 'val':
#         objects = coco_api_validation.get_bounding_boxes_by_names(id_, [category])
#         objects = objects[coco_api_validation.get_category_id(category)]
        output_path = os.path.join('robocup_objects/validation_images/',str(latest_image_num) \
             + '.jpg')
        image_name = os.path.join('validation_images',str(latest_image_num) \
             + '.jpg')
    else:
#         objects = coco_api_training.get_bounding_boxes_by_names(id_, [category])
#         objects = objects[coco_api_training.get_category_id(category)]
        output_path = os.path.join('robocup_objects/training_images/',str(latest_image_num) \
             + '.jpg')
        image_name = os.path.join('training_images',str(latest_image_num) \
             + '.jpg')

    image_inf = image_data['info']
    bounding_boxes = image_data['bounding_boxes']
    detected_objects = []

    for category_id, boxes in bounding_boxes.items():
        class_name = coco_cat_id_to_name[category_id]
        for box in boxes:
            object_dict = {'class_id': classes_to_id[class_name.replace(' ','_')],
                           'xmin': box['min_x'],
                           'xmax': box['min_x'] + box['width'],
                           'ymin': box['min_y'],
                           'ymax': box['min_y'] + box['height']}

            detected_objects.append(object_dict)
    shutil.copyfile(image_inf.image_path, output_path)
    if dataset == 'val':
        val_annotations.append({'image_name': image_name,'objects': detected_objects})
    else:
        train_annotations.append({'image_name': image_name,'objects': detected_objects})
#         
        
#     cat_idx +=1

## Save annotations

In [None]:
dataset = 'train'
if dataset == 'val':
    annotations_file = 'robocup_objects/val_annotations.yml'
    output_annotations = val_annotations
else:
    print('saving training annotations')
    annotations_file = 'robocup_objects/train_annotations.yml'
    output_annotations = train_annotations

with open(annotations_file, 'w') as annotation_file:
    yaml.safe_dump(output_annotations, annotation_file,default_flow_style=False,
                   encoding='utf-8')
    
print('done saving')

In [None]:
train_annotations = train_annotations[0: 95496]

len(train_annotations)

# Manual labeled images

In [None]:
trash_can_path = 'trash_can/'
xml_files = glob.glob(trash_can_path+'/*.xml')
img_files = glob.glob(trash_can_path+'/*.jpg')
# print(img_files)

img_name_ = train_annotations[-1]['image_name']
latest_image_num = int(os.path.basename(img_name_).replace('.jpg',''))

for idx, xml_file in enumerate(xml_files):
    latest_image_num += 1
    root = xml.etree.ElementTree.parse(xml_file).getroot()
    
    image_name = os.path.join('training_images',str(latest_image_num) + '.jpg') 
    file_name = root.find('filename').text
    output_path = os.path.join('robocup_objects/training_images/',str(latest_image_num) \
                 + '.jpg')
    
#     print(file_name)
    
    object_dict = {'class_id': classes_to_id['trash_can'],
                   'xmin': int(root.find('object').find('bndbox').find('xmin').text),
                   'xmax': int(root.find('object').find('bndbox').find('xmax').text),
                   'ymin': int(root.find('object').find('bndbox').find('ymin').text),
                   'ymax': int(root.find('object').find('bndbox').find('ymax').text)}
    
    train_annotations.append({'image_name': image_name,'objects': [object_dict]})
    
    img = imread(img_files[idx]) 
    imwrite(output_path, img)
    
print(len(train_annotations))