In [1]:
import xml.etree.ElementTree as ET
from os import getcwd

In [2]:
###========= voc annotation 2007/2012 start ==========
sets=[('2007', 'train'), ('2007', 'val'), \
      ('2007', 'test'), ('2012', 'train'), \
      ('2012', 'val')]

In [3]:
classes = ["aeroplane", "bicycle", "bird", "boat", \
           "bottle", "bus", "car", "cat", "chair", \
           "cow", "diningtable", "dog", "horse", \
           "motorbike", "person", "pottedplant", \
           "sheep", "sofa", "train", "tvmonitor"]

In [4]:
# convert annotation 
def convert_annotation(year, image_id, list_file):
    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
    tree=ET.parse(in_file)
    root = tree.getroot()

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))

In [5]:
wd = getcwd()

In [6]:
for year, image_set in sets:
    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
    list_file = open('%s_%s.txt'%(year, image_set), 'w')
    for image_id in image_ids:
        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
        convert_annotation(year, image_id, list_file)
        list_file.write('\n')
    list_file.close()
###============ voc annotation end ============

In [7]:
#============== coco 2017 annotation start=============
import json, argparse
from collections import defaultdict
from os import getcwd
import numpy as np

In [8]:
sets=[('instances_train2017', 'train2017'), ('instances_val2017', 'val2017')]

class_count = {}
'''
parser = argparse.ArgumentParser(description='convert COCO dataset annotation to txt annotation file')
parser.add_argument('--dataset_path', type=str, required=False, help='path to MSCOCO dataset, default is ../mscoco2017', default=getcwd()+'/../mscoco2017')
parser.add_argument('--output_path', type=str, required=False,  help='output path for generated annotation txt files, default is ./', default='./')
parser.add_argument('--classes_path', type=str, required=False, help='path to class definitions, default is ../configs/coco_classes.txt', default=getcwd()+'/../configs/coco_classes.txt')
parser.add_argument('--include_no_obj', action="store_true", help='to include no object image', default=False)
args = parser.parse_args()
'''
dataset_path= 'COCO2017'
output_path=''
classes_path='configs/coco_classes.txt'
include_no_obj=False

In [9]:
def get_classes(classes_path):
    '''loads the classes'''
    with open(classes_path) as f:
        classes = f.readlines()
    classes = [c.strip() for c in classes]
    return classes

In [10]:
classes = get_classes(classes_path)
for dataset, datatype in sets:
    image_annotation_dict = defaultdict(list)
    coco_annotation_file = open("%s/annotations/%s.json"%(dataset_path, dataset),
                           encoding='utf-8')
    # annotation_data format:
    # {
    #  "info": info,
    #  "licenses": [license],
    #  "images": [image],
    #  "type": "instances",
    #  "annotations": [annotation],
    #  "categories": [category]
    # }
    annotation_data = json.load(coco_annotation_file)
    annotations = annotation_data['annotations']

    # count class item number in each set
    class_count = {itm: 0 for itm in classes}

    # to include no object image, we need to involve
    # all images to image_annotation_dict
    if include_no_obj:
        images = annotation_data['images']
        for image in images:
            # image format:
            # {
            #  "license": int,
            #  "url": "url_string",
            #  "file_name": "name_string",
            #  "height": int,
            #  "width": int,
            #  "date_captured": "date_string",
            #  "id": int
            # }
            image_id = image['id']
            image_file = '%s/%s/%012d.jpg' % (dataset_path, datatype, image_id)
            image_annotation_dict[image_file] = []

    for annotation in annotations:
        # annotation format:
        # {
        #  "id": int,
        #  "image_id": int,
        #  "category_id": int,
        #  "segmentation": RLE or [polygon],
        #  "area": float,
        #  "bbox": [x,y,width,height],
        #  "iscrowd": 0 or 1
        # }
        image_id = annotation['image_id']
        image_file = '%s/%s/%012d.jpg' % (dataset_path, datatype, image_id)
        category_id = annotation['category_id']

        # since original 80 COCO category_ids is discontinuous,
        # we need to align them to continuous id (0~79) for further process
        if category_id >= 1 and category_id <= 11:
            category_id = category_id - 1
        elif category_id >= 13 and category_id <= 25:
            category_id = category_id - 2
        elif category_id >= 27 and category_id <= 28:
            category_id = category_id - 3
        elif category_id >= 31 and category_id <= 44:
            category_id = category_id - 5
        elif category_id >= 46 and category_id <= 65:
            category_id = category_id - 6
        elif category_id == 67:
            category_id = category_id - 7
        elif category_id == 70:
            category_id = category_id - 9
        elif category_id >= 72 and category_id <= 82:
            category_id = category_id - 10
        elif category_id >= 84 and category_id <= 90:
            category_id = category_id - 11

        # merge to image bbox annotations
        image_annotation_dict[image_file].append([annotation['bbox'], category_id])

        # count object class for statistic
        class_name = classes[category_id]
        class_count[class_name] = class_count[class_name] + 1

    # save converting result to our annotation file
    #annotation_file = open('%s/%s.txt'%(output_path, datatype), 'w')
    annotation_file = open('%s.txt'%datatype, 'w')
    for image_file in image_annotation_dict.keys():
        annotation_file.write(image_file)
        box_infos = image_annotation_dict[image_file]
        for box_info in box_infos:
            # bbox format: [xmin, ymin, w, h]
            bbox = box_info[0]
            category_id = box_info[1]
            x_min = int(bbox[0])
            y_min = int(bbox[1])
            x_max = x_min + int(bbox[2])
            y_max = y_min + int(bbox[3])

            box_annotation = " %d,%d,%d,%d,%d" % (
                x_min, y_min, x_max, y_max, int(category_id))
            annotation_file.write(box_annotation)
        annotation_file.write('\n')
    annotation_file.close()
    # print out item number statistic
    print('\nDone for %s/%s.txt. classes number statistic'%(output_path, datatype))
    print('Image number: %d'%(len(image_annotation_dict)))
    print('Object class number:')
    for (class_name, number) in class_count.items():
        print('%s: %d' % (class_name, number))
    print('total object number:', np.sum(list(class_count.values())))

#============== coco 2017 annotation end=============


Done for /train2017.txt. classes number statistic
Image number: 117266
Object class number:
person: 262465
bicycle: 7113
car: 43867
motorbike: 8725
aeroplane: 5135
bus: 6069
train: 4571
truck: 9973
boat: 10759
traffic light: 12884
fire hydrant: 1865
stop sign: 1983
parking meter: 1285
bench: 9838
bird: 10806
cat: 4768
dog: 5508
horse: 6587
sheep: 9509
cow: 8147
elephant: 5513
bear: 1294
zebra: 5303
giraffe: 5131
backpack: 8720
umbrella: 11431
handbag: 12354
tie: 6496
suitcase: 6192
frisbee: 2682
skis: 6646
snowboard: 2685
sports ball: 6347
kite: 9076
baseball bat: 3276
baseball glove: 3747
skateboard: 5543
surfboard: 6126
tennis racket: 4812
bottle: 24342
wine glass: 7913
cup: 20650
fork: 5479
knife: 7770
spoon: 6165
bowl: 14358
banana: 9458
apple: 5851
sandwich: 4373
orange: 6399
broccoli: 7308
carrot: 7852
hot dog: 2918
pizza: 5821
donut: 7179
cake: 6353
chair: 38491
sofa: 5779
pottedplant: 8652
bed: 4192
diningtable: 15714
toilet: 4157
tvmonitor: 5805
laptop: 4970
mouse: 2262
remot