# Preface

Task

**<u>Frontend UI / Dataset management with annotation tool</u>**

- Scripts for checking of formatting with different annotation types and/or conversion of one format to another (focus on COCO <> PASCAL) --> **Currently at this stage, coverting btw coco and xml**

<img src="capture.png">


## COCO to XML

Last ran: 26/07/2021, success!!!

<u>Code Credits:</u>

**To convert from coco to xml @https://github.com/mhiyer/coco-annotations-to-xml**

To convert from xml to coco @https://github.com/roboflow-ai/voc2coco/blob/master/voc2coco.py

<u>Notes:</u> 
- Had to pip install opencv-python for cv2
- Testing on instances_val2017.json only, which is part of COCO's 2017 dataset (annotations_trainval2017)
- Testing done on all val2017 images

In [49]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
import cv2
import json

In [50]:
### COCO to XML

# read annotations file
annotations_path = 'instances_val2017.json'
    
# read coco category list
df = pd.read_csv('coco_categories.csv')
df.set_index('id', inplace=True)
    
# specify image locations
image_folder = 'val2017'


# specify savepath - where to save .xml files
savepath = 'saved'
if not os.path.exists(savepath):
    os.makedirs(savepath)
    
    # read in .json format
with open(annotations_path,'rb') as file:
    doc = json.load(file)
        
    # get annotations
annotations = doc['annotations']
    
    # iscrowd allowed? 1 for ok, else set to 0
iscrowd_allowed = 1
    
    # initialize dict to store bboxes for each image
image_dict = {}
    
    # loop through the annotations in the subset
for anno in annotations:
    # get annotation for image name
    image_id = anno['image_id']
    image_name = '{0:012d}.jpg'.format(image_id)    
        
        # get category
    category = df.loc[anno['category_id']]['name']
        
        # add as a key to image_dict
    if not image_name in image_dict.keys():
        image_dict[image_name]=[]
        
        # append bounding boxes to it
    box = anno['bbox']
        # since bboxes = [xmin, ymin, width, height]:
    image_dict[image_name].append([category, box[0], box[1], box[0]+box[2], box[1]+box[3]])

In [51]:
### COCO to xml

def write_to_xml(image_name, image_dict, data_folder, save_folder, xml_template='pascal_voc_template.xml'):
    
    # get bboxes
    bboxes = image_dict[image_name]
    
    # read xml file
    tree = ET.parse(xml_template)
    root = tree.getroot()    
    
    # modify
    folder = root.find('folder')
    folder.text = 'Annotations'
    
    fname = root.find('filename')
    fname.text = image_name.split('.')[0] 
    
    src = root.find('source')
    database = src.find('database')
    database.text = 'COCO2017'
     
    # size
    img = cv2.imread(os.path.join('D:\\Python\\LTA_GIG\\COCO\\val2017\\val2017', image_name))
    h,w,d = img.shape
    
    size = root.find('size')
    width = size.find('width')
    width.text = str(w)
    height = size.find('height')
    height.text = str(h)
    depth = size.find('depth')
    depth.text = str(d)
    
    for box in bboxes:
        # append object
        obj = ET.SubElement(root, 'object')
        
        name = ET.SubElement(obj, 'name')
        name.text = box[0]
        
        pose = ET.SubElement(obj, 'pose')
        pose.text = 'Unspecified'

        truncated = ET.SubElement(obj, 'truncated')
        truncated.text = str(0)

        difficult = ET.SubElement(obj, 'difficult')
        difficult.text = str(0)

        bndbox = ET.SubElement(obj, 'bndbox')
        
        xmin = ET.SubElement(bndbox, 'xmin')
        xmin.text = str(int(box[1]))
        
        ymin = ET.SubElement(bndbox, 'ymin')
        ymin.text = str(int(box[2]))
        
        xmax = ET.SubElement(bndbox, 'xmax')
        xmax.text = str(int(box[3]))
        
        ymax = ET.SubElement(bndbox, 'ymax')
        ymax.text = str(int(box[4]))
    
    # save .xml to anno_path
    anno_path = os.path.join(save_folder, image_name.split('.')[0] + '.xml')
    print(anno_path)
    tree.write(anno_path)

In [52]:
# generate .xml files
for image_name in image_dict.keys():
    write_to_xml(image_name, image_dict, image_folder, savepath)
    #print('generated for: ', image_name)

saved\000000289343.xml
saved\000000061471.xml
saved\000000472375.xml
saved\000000520301.xml
saved\000000579321.xml
saved\000000494869.xml
saved\000000554002.xml
saved\000000078823.xml
saved\000000419974.xml
saved\000000404484.xml
saved\000000329219.xml
saved\000000068078.xml
saved\000000170893.xml
saved\000000065485.xml
saved\000000498286.xml
saved\000000424162.xml
saved\000000061108.xml
saved\000000067213.xml
saved\000000365207.xml
saved\000000131273.xml
saved\000000279278.xml
saved\000000482100.xml
saved\000000540502.xml
saved\000000127182.xml
saved\000000565776.xml
saved\000000575970.xml
saved\000000462614.xml
saved\000000407614.xml
saved\000000522007.xml
saved\000000226984.xml
saved\000000037777.xml
saved\000000491216.xml
saved\000000136355.xml
saved\000000529568.xml
saved\000000306733.xml
saved\000000042276.xml
saved\000000370818.xml
saved\000000464476.xml
saved\000000068833.xml
saved\000000433915.xml
saved\000000149222.xml
saved\000000017379.xml
saved\000000361586.xml
saved\00000