In [1]:
import pickle
import os
import pandas as pd
import numpy as np
from PIL import Image
try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

In [2]:
# constants
annotationfile = 'annotations.txt'
jpegimagepath = './img/'
xmlfile_destination = './xml/'

imgfile_names = os.listdir(jpegimagepath)
annotation_list = []
imagesize_list = []

with open(annotationfile, 'r') as foo:
    for bar in foo:
        baz = bar.split('\n')[0]
        qux = baz.split(' ')
        annotation_list.append(qux)

for name in imgfile_names:
    im = Image.open(jpegimagepath + name)
    t = list(im.size)
    t.insert(0,name)
    imagesize_list.append(t)
    
annotation_list = pd.DataFrame(annotation_list)
imagesize_list = pd.DataFrame(imagesize_list)

res = pd.merge(annotation_list,imagesize_list,on=0)
del res['1_x']
df = res.rename(index=str,columns={
    0:'filename',
    '2_x':'xmin',
    3:'ymin',
    4:'xmax',
    5:'ymax',
    '1_y':'width',
    '2_y':'height'})
# 
df[['xmin', 'ymin', 'xmax', 'ymax']] = df[['xmin', 'ymin', 'xmax', 'ymax']].apply(pd.to_numeric)
df['xmax'] = df['xmin'] + df['xmax']
df['ymax'] = df['ymin'] + df['ymax']

# change to str
df = df.astype({'xmin':str, 'ymin':str, 'xmax':str, 'ymax':str, 'height':str, 'width':str})
df[:5]

Unnamed: 0,filename,xmin,ymin,xmax,ymax,width,height
0,IMG_0171.jpg,472,382,505,414,907,907
1,IMG_0172.jpg,472,382,505,415,907,907
2,IMG_0173.jpg,100,213,186,288,907,907
3,IMG_0174.jpg,97,213,183,289,907,907
4,IMG_0175.jpg,72,289,158,362,907,907


In [3]:
for i,val in df.iterrows():
    annotation = ET.Element('annotation')
    filename = ET.Element('filename')
    size = ET.Element('size')
    o = ET.Element('object')
    width = ET.SubElement(size, 'width')
    height = ET.SubElement(size, 'height')
    depth = ET.SubElement(size, 'depth')
    bndbox = ET.SubElement(o, 'bndbox')
    name = ET.SubElement(o, 'name')
    xmin = ET.SubElement(bndbox, 'xmin')
    ymin = ET.SubElement(bndbox, 'ymin')
    xmax = ET.SubElement(bndbox, 'xmax')
    ymax = ET.SubElement(bndbox, 'ymax')
    annotation.extend((filename, size, o))
    
    filename.text = val['filename']
    xmin.text = val['xmin']
    ymin.text = val['ymin']
    xmax.text = val['xmax']
    ymax.text = val['ymax']
    width.text = val['width']
    height.text = val['height']
    name.text = 'stop_sign'
    depth.text = '3'
    
    tree = ET.ElementTree(annotation)
    tree.write(xmlfile_destination + filename.text.split('.')[0] + '.xml')

In [4]:
class XML_preprocessor(object):

    def __init__(self, data_path):
        self.path_prefix = data_path
        self.num_classes = 1
        self.data = dict()
        self._preprocess_XML()

    def _preprocess_XML(self):
        filenames = os.listdir(self.path_prefix)
        for filename in filenames:
            tree = ET.parse(self.path_prefix + filename)
            root = tree.getroot()
            bounding_boxes = []
            one_hot_classes = []
            size_tree = root.find('size')
            width = float(size_tree.find('width').text)
            height = float(size_tree.find('height').text)
            for object_tree in root.findall('object'):
                for bounding_box in object_tree.iter('bndbox'):
                    xmin = float(bounding_box.find('xmin').text)/width
                    ymin = float(bounding_box.find('ymin').text)/height
                    xmax = float(bounding_box.find('xmax').text)/width
                    ymax = float(bounding_box.find('ymax').text)/height
                bounding_box = [xmin,ymin,xmax,ymax]
                bounding_boxes.append(bounding_box)
                class_name = object_tree.find('name').text
                one_hot_class = self._to_one_hot(class_name)
                one_hot_classes.append(one_hot_class)
            image_name = root.find('filename').text
            bounding_boxes = np.asarray(bounding_boxes)
            one_hot_classes = np.asarray(one_hot_classes)
            image_data = np.hstack((bounding_boxes, one_hot_classes))
            self.data[image_name] = image_data

    def _to_one_hot(self,name):
        one_hot_vector = [0] * self.num_classes
        if name == 'stop_sign':
            one_hot_vector[0] = 1
            """
        elif name == 'bicycle':
            one_hot_vector[1] = 1
        elif name == 'bird':
            one_hot_vector[2] = 1
        elif name == 'boat':
            one_hot_vector[3] = 1
        elif name == 'bottle':
            one_hot_vector[4] = 1
        elif name == 'bus':
            one_hot_vector[5] = 1
        elif name == 'car':
            one_hot_vector[6] = 1
        elif name == 'cat':
            one_hot_vector[7] = 1
        elif name == 'chair':
            one_hot_vector[8] = 1
        elif name == 'cow':
            one_hot_vector[9] = 1
        elif name == 'diningtable':
            one_hot_vector[10] = 1
        elif name == 'dog':
            one_hot_vector[11] = 1
        elif name == 'horse':
            one_hot_vector[12] = 1
        elif name == 'motorbike':
            one_hot_vector[13] = 1
        elif name == 'person':
            one_hot_vector[14] = 1
        elif name == 'pottedplant':
            one_hot_vector[15] = 1
        elif name == 'sheep':
            one_hot_vector[16] = 1
        elif name == 'sofa':
            one_hot_vector[17] = 1
        elif name == 'train':
            one_hot_vector[18] = 1
        elif name == 'tvmonitor':
            one_hot_vector[19] = 1
            """
        else:
            print('unknown label: %s' %name)
        return one_hot_vector

In [5]:
data = XML_preprocessor(xmlfile_destination).data
pickle.dump(data,open('data.p','wb'))