# Explore Verified_Annotation/ data

In [None]:
import os
import xml.etree.ElementTree as ET
import collections
from PIL import Image, ImageDraw
from matplotlib.pyplot import figure, imshow, show
import matplotlib

import numpy as np
%matplotlib inline

ROOT = '/mnt/hdd1/lalo/Verified_Annotation/'
VERT_SIZE = 300
DPI = 230


def parse_voc_xml(node):
    voc_dict = {}
    children = list(node)
    if children:
        def_dic = collections.defaultdict(list)
        for dc in map(parse_voc_xml, children):
            for ind, v in dc.items():
                def_dic[ind].append(v)
        voc_dict = {
            node.tag:
                {ind: v[0] if len(v) == 1 else v
                 for ind, v in def_dic.items()}
        }
    if node.text:
        text = node.text.strip()
        if not children:
            voc_dict[node.tag] = text
    return voc_dict

def draw_boxes(image, objects):
    hor_size = image.size[1]
    scale_factor = VERT_SIZE / image.size[1]
    new_width = image.size[0] * scale_factor
    image.thumbnail((new_width, VERT_SIZE))
    draw = ImageDraw.Draw(image)


    
    for o in objects:
        box = o['bndbox']
        box = scale_factor * np.array(
            [float(box['xmin']), float(box['ymin']),
             float(box['xmax']), float(box['ymax'])]
        )
        draw.rectangle(
            ((box[0], box[1]), (box[2], box[3])),
             outline='red', width=4
        )
        draw.text((box[0], box[1]), str(o['name']))
    return image

In [None]:
# This can't be set on the previous cell for 'reasons'
matplotlib.rcParams['figure.dpi'] = DPI

for root, dirs, files in os.walk(ROOT):
    dirs.sort()
    for file_name in [os.path.join(root, f) for f in files]:
        if os.path.splitext(file_name)[1] == '.xml':
            print(file_name)
            data = parse_voc_xml(ET.parse(file_name).getroot())
            
            try:
                objects = data['annotation']['object']
            except KeyError:
                continue
                
            objects = objects if type(objects) is list else [objects]
            image_path = os.path.join(root, data['annotation']['filename'])
            image_name = os.path.basename(image_path)
            img = draw_boxes(Image.open(image_path), objects)
            figure()
            imshow(np.asarray(img))
            show()