## Parse the Annotation of PASCAL VOC

## 匯入相關函式庫 Import all the necessary libraries

In [1]:
import os
import xml.etree.ElementTree as ET

import cv2
import numpy as np

## 設定相關設定與參數 Setting parameters

In [2]:
# 訓練/驗證用的資料目錄
DATA_PATH = "../data"

# 資料集目錄
DATA_SET_PATH = os.path.join(DATA_PATH, "kangaroo")

# 資料集標註檔目錄
ANNOTATIONS_PATH = os.path.join(DATA_SET_PATH, "annots")

# 資料集圖像檔目錄
IMAGES_PATH = os.path.join(DATA_SET_PATH, "images")

print(DATA_PATH)
print(DATA_SET_PATH)
print(ANNOTATIONS_PATH)
print(IMAGES_PATH)

../data
../data/kangaroo
../data/kangaroo/annots
../data/kangaroo/images


In [3]:
# Classes that you want to detect.
CLASSES = ['kangaroo']

In [4]:
# 一個列表物件,每一個列表物件都包含了要訓練用的重要資訊
all_imgs = [] 

"""
{
    'filename:'/path/kangaroo.jpg', 'width':128, 'height':128, 'depth': 3, 
    'object': [
        {'name':'person', xmin:0, ymin:0, xmax:28, ymax:28},
        {'name':'kangaroo',xmin:45, ymin:45, xmax:60, ymax:60}
    ]
}
"""

# 一個字典物件: {Key:圖像類別, Value:出現的次數}
seen_labels = {}

## Parse Annotation

In [5]:
all_imgs = []
seen_labels = {}
for ann in sorted(os.listdir(ANNOTATIONS_PATH)):
    img = {'object': []}
    
    tree = ET.parse(os.path.join(ANNOTATIONS_PATH, ann))
    root = tree.getroot()
    
    img['filename'] = os.path.join(IMAGES_PATH, root.find('filename').text)
    
    for size in root.findall('size'):
        img['width'] = int(size.find('width').text)
        img['height'] = int(size.find('height').text)
        img['depth'] = int(size.find('depth').text)
    
    for boxes in root.iter('object'):
        obj = {}
        obj['name'] = boxes.find('name').text
        
        #檢查此類別是否有在要偵測的類別中,如果沒有則忽略
        if len(CLASSES) > 0 and obj['name'] not in CLASSES:
            break
        
        if obj['name'] in seen_labels:
            seen_labels[obj['name']] += 1
        else:
            seen_labels[obj['name']] = 1
        
        for box in boxes.findall('bndbox'):
            obj['xmin'] = int(box.find("xmin").text)
            obj['ymin'] = int(box.find("ymin").text)
            obj['xmax'] = int(box.find("xmax").text)
            obj['ymax'] = int(box.find("ymax").text)
    
        img['object'] += [obj]
        
    all_imgs += [img]


print(seen_labels, len(all_imgs))
all_imgs[0]

{'kangaroo': 266} 164


{'object': [{'name': 'kangaroo',
   'xmin': 233,
   'ymin': 89,
   'xmax': 386,
   'ymax': 262},
  {'name': 'kangaroo', 'xmin': 134, 'ymin': 105, 'xmax': 341, 'ymax': 253}],
 'filename': '../data/kangaroo/images/00001.jpg',
 'width': 450,
 'height': 319,
 'depth': 3}