In [22]:
from pathlib import Path
import numpy as np
import pandas as pd
from xml.etree import ElementTree as ET


In [24]:
root = ET.parse('2007_000032.xml').getroot()
print(root.findall('./object/'))


[<Element 'name' at 0x7f72a3a7f5e8>, <Element 'pose' at 0x7f72a3a7f638>, <Element 'truncated' at 0x7f72a3a7f688>, <Element 'difficult' at 0x7f72a3a7f6d8>, <Element 'bndbox' at 0x7f72a3a7f728>, <Element 'name' at 0x7f72a3a7f908>, <Element 'pose' at 0x7f72a3a7f958>, <Element 'truncated' at 0x7f72a3a7f9a8>, <Element 'difficult' at 0x7f72a3a7f9f8>, <Element 'bndbox' at 0x7f72a3a7fa48>, <Element 'name' at 0x7f72a3a7fc28>, <Element 'pose' at 0x7f72a3a7fc78>, <Element 'truncated' at 0x7f72a3a7fcc8>, <Element 'difficult' at 0x7f72a3a7fd18>, <Element 'bndbox' at 0x7f72a3a7fd68>, <Element 'name' at 0x7f72a3a7ff48>, <Element 'pose' at 0x7f72a3a7ff98>, <Element 'truncated' at 0x7f72a3a83048>, <Element 'difficult' at 0x7f72a3a83098>, <Element 'bndbox' at 0x7f72a3a830e8>]


In [49]:
# print(ET.tostring(root, encoding='utf8').decode('utf8'))

In [42]:
for elements in root.findall('./object'):
    name = elements.find('name').text


In [50]:
name

'person'

In [4]:
VOC_CLASSES = (  # always index 0
    'aeroplane', 'bicycle', 'bird', 'boat',
    'bottle', 'bus', 'car', 'cat', 'chair',
    'cow', 'diningtable', 'dog', 'horse',
    'motorbike', 'person', 'pottedplant',
    'sheep', 'sofa', 'train', 'tvmonitor')

In [5]:
name = [f.text for f in root.findall('./object/name')]
print(name)

['person']


In [133]:
class VOCannotations(object):
    
    def __init__(self, file_name):
        self.root = ET.parse(file_name).getroot()
    
    def get_bndboxes(self):
        data = []
        for elements in self.root.findall('./object/bndbox'):
            xmin = int(elements.find('xmin').text)
            ymin = int(elements.find('ymin').text)
            xmax = int(elements.find('xmax').text)
            ymax = int(elements.find('ymax').text)
            data.append([xmin, ymin, xmax, ymax])
        return data
        
    def get_object_class(self):
        objects = []
        for elements in root.findall('./object'):
            objects.append(elements.find('name').text)
            
        return objects
    
    def get_dimensions(self):
        dim = [int(f.text) for f in self.root.findall('./size/')]
        return {'width':dim[1], 'height':dim[0], 'depth':dim[2]}
          
    def get_filename(self):
        return self.root.findall('./filename')[0].text
    
    def write_csvfile(self, save_path = None):
        filename = self.get_filename().split('.')[0]
        boxes    = self.get_bndboxes()
        classes  = self.get_object_class()
        
        data = []
        for i, box in enumerate(boxes):
            tmp  = [classes[i]]
            tmp.extend(box)
            data.append(tmp)

        header = ['class','xmin','ymin','xmax','ymax']
        
        
        save_path  = Path(save_path)
        
        if not Path.exists(save_path):
            save_path.mkdir(parents = True, exist_ok=True)
        
        
        df = pd.DataFrame(data)
        df.to_csv( save_path/'{}.csv'.format(filename), header = header, index = None)
        

In [134]:
test      = VOCannotations('2007_000032.xml')
boxes     = test.get_bndboxes()
filename  = test.get_filename()
dimension = test.get_dimensions()
classes   = test.get_object_class()
test.write_csvfile(save_path = Path.cwd()/'test')

In [56]:
print(filename)
print(dimension)
print(classes)
print(np.array(boxes).shape)


2007_000027.jpg
{'width': 500, 'height': 486, 'depth': 3}
['aeroplane', 'aeroplane', 'person', 'person']
(1, 4)
2007_000027
