In [1]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

In [2]:
os.chdir("/home/sergio/Documentos/VOC2012/Annotations")
xml_files = os.listdir()
print(xml_files[:10])

['2011_001728.xml', '2011_006503.xml', '2008_004545.xml', '2012_000625.xml', '2011_000404.xml', '2010_000702.xml', '2007_008256.xml', '2008_007623.xml', '2012_000363.xml', '2012_001451.xml']


In [3]:
def read_content(xml_file: str):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text

        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))

        list_with_single_boxes = [xmin, ymin, xmax, ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes

file_name, boxes, classes = read_content("2011_006503.xml")


In [4]:
print(file_name)
print(boxes)
print(classes)

2011_006503.jpg
[[186, 135, 250, 288], [104, 251, 321, 409], [166, 109, 216, 207]]
['person', 'person', 'person']


In [5]:
"person" in classes

True

In [6]:
kept_files = []
person_instances =0
total_instances = 0

for i in xml_files:
    file_name, boxes, classes = read_content(i)
    total_instances += len(boxes)
    if "person" in classes:
        kept_files.append(file_name)
        num_person = classes.count("person")
        person_instances +=num_person
        
print("Total images: " + str(len(xml_files)))
print("Images with person instances: " + str(len(kept_files)))
print("Person instances: " + str(person_instances))
print("Total instances: " + str(total_instances))


Total images: 17125
Images with person instances: 9583
Person instances: 17401
Total instances: 40138


### Creating the metadata file(CVS)

In [7]:
from PIL import Image, ExifTags
os.chdir("/home/sergio/Documentos/VOC2012/JPEGImages")

voc_img_metadata = {}

for i in kept_files:
    img = Image.open(i)
    width,height = img.size
    voc_img_metadata[i]={"width":width,"height":height}

In [8]:
metadata_df = pd.DataFrame(voc_img_metadata).T

In [9]:
metadata_df.head()

Unnamed: 0,width,height
2011_001728.jpg,500,375
2011_006503.jpg,375,500
2008_004545.jpg,375,500
2012_000625.jpg,500,333
2007_008256.jpg,480,423


In [10]:
metadata_df.shape

(9583, 2)

In [17]:
os.chdir("/home/sergio/Documentos/VOC2012")
metadata_df.to_csv("VOC_metadata.csv",index=True)

## Filtering the images

In [46]:
os.chdir("/home/sergio/Documentos/VOC2012")

try:
    os.mkdir("pedestrian_dataset_voc")
except:
    pass

In [66]:
from shutil import copyfile

def get_class_images(SOURCE, NEW_LOC,CLASS_NAMES):

    names_list =os.listdir(SOURCE)

    for i in names_list:
        if i in CLASS_NAMES:
                copyfile(SOURCE + "/" + i, NEW_LOC + "/" + i)

In [67]:
get_class_images("/home/sergio/Documentos/VOC2012/JPEGImages","/home/sergio/Documentos/VOC2012/pedestrian_dataset_voc",kept_files)

In [9]:
os.chdir("/home/sergio/Documentos/VOC2012/JPEGImages")
#print(os.listdir()[:5])

['2009_004581.jpg', '2011_000627.jpg', '2008_002198.jpg', '2010_002728.jpg', '2011_003942.jpg']


## Creating the Annotations file (CSV)

In [11]:
os.chdir("/home/sergio/Documentos/VOC2012/Annotations")
xml_files = os.listdir()

def read_content(xml_file: str):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text

        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))

        list_with_single_boxes = [xmin, ymin, xmax, ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes

In [12]:
dict_annotations = {}
num_instance = 0
for i in xml_files:
    file_name,list_bbox,list_classes = read_content(i)
    
    for index,j in enumerate(list_classes):
        if j == "person":
            dict_annotations[num_instance] = {"file_name":file_name, "x_min":list_bbox[index][1],"y_min":list_bbox[index][0],
                                              "delta_x":list_bbox[index][2]-list_bbox[index][0],
                                             "delta_y": list_bbox[index][3]-list_bbox[index][1]}
            num_instance +=1

In [13]:
annotations_df = pd.DataFrame(dict_annotations).T
annotations_df.head()

Unnamed: 0,file_name,x_min,y_min,delta_x,delta_y
0,2011_001728.jpg,62,313,116,181
1,2011_006503.jpg,135,186,64,153
2,2011_006503.jpg,251,104,217,158
3,2011_006503.jpg,109,166,50,98
4,2008_004545.jpg,173,2,23,30


In [16]:
annotations_df.shape

(17401, 5)

In [14]:
os.chdir("/home/sergio/Documentos/VOC2012")
annotations_df.to_csv("VOC_annotations.csv",index=False)

In [15]:
annotations_df.index[annotations_df["file_name"] == "2011_006503.jpg"].tolist()

[1, 2, 3]

In [None]:
os.chdir("/home/sergio/Documentos/VOC2012/Annotations")
xml_files = os.listdir()