In [29]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

from albumentations import (
    convert_bbox_to_albumentations,convert_bboxes_to_albumentations
)

In [30]:
#Leemos los xml files para cada anotación
os.chdir("/home/sergio/Documents/VOC2012/Annotations")
xml_files = os.listdir()

In [36]:
def read_content(xml_file: str):
    '''
    Función que toma un archivo xml y extrae las coordendas de los bounding boxes
    de cada imagen y las clases de cada bounding box
    '''
    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text

        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))

        list_with_single_boxes = [xmin, ymin, xmax, ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes
#Ejemplo
file_name, boxes, classes = read_content("2011_006503.xml")
print(file_name)
print(boxes)
print(classes)

2011_006503.jpg
[[186, 135, 250, 288], [104, 251, 321, 409], [166, 109, 216, 207]]
['person', 'person', 'person']


In [37]:
#Leemos toda la base de datos
kept_files = []
person_instances =0
total_instances = 0

for i in xml_files:
    file_name, boxes, classes = read_content(i)
    total_instances += len(boxes)
    if "person" in classes:
        kept_files.append(file_name)
        num_person = classes.count("person")
        person_instances +=num_person
        
print("Total images: " + str(len(xml_files)))
print("Images with person instances: " + str(len(kept_files)))
print("Person instances: " + str(person_instances))
print("Total instances: " + str(total_instances))


Total images: 17125
Images with person instances: 9583
Person instances: 17401
Total instances: 40138


## Creating the metadata file (CSV)

In [38]:
from PIL import Image, ExifTags
os.chdir("/home/sergio/Documents/VOC2012/JPEGImages")

voc_img_metadata = {}

for i in kept_files:
    img = Image.open(i)
    width,height = img.size
    voc_img_metadata[i]={"width":width,"height":height}

In [40]:
metadata_df = pd.DataFrame(voc_img_metadata).T

In [43]:
display(metadata_df.head())
print(metadata_df.shape)

Unnamed: 0,width,height
2008_005379.jpg,500,375
2012_003403.jpg,314,186
2009_003338.jpg,500,332
2012_000625.jpg,500,333
2011_004438.jpg,500,334


(9583, 2)


In [44]:
#Guardamos la tabla como un CSV
os.chdir("/home/sergio/Documents/VOC2012")
metadata_df.to_csv("VOC_metadata.csv",index=True)

## Filtering the images

Seleccionamos las imágenes que contienen la clase "Person" y las guardamos en una nueva carpeta llamada "pedestrian_dataset_voc"

In [46]:
os.chdir("/home/sergio/Documents/VOC2012")

try:
    os.mkdir("pedestrian_dataset_voc")
except:
    pass

In [66]:
from shutil import copyfile

def get_class_images(SOURCE, NEW_LOC,CLASS_NAMES):

    names_list =os.listdir(SOURCE)

    for i in names_list:
        if i in CLASS_NAMES:
                copyfile(SOURCE + "/" + i, NEW_LOC + "/" + i)

In [67]:
get_class_images("/home/sergio/Documents/VOC2012/JPEGImages","/home/sergio/Documents/VOC2012/pedestrian_dataset_voc",kept_files)

In [13]:
os.chdir("/home/sergio/Documents/VOC2012/JPEGImages")
#print(os.listdir()[:5])

## Creating the Annotations file (CSV)

Creamos un CSV de anotaciones, cada fila será una anotación, donde viene el archivo al que pertenece así como sus coordenadas del tipo (x_min,y_min,x_max,y_max) normalizados, es decir, de [0,1], que es el formato que el módulo de Albumentations

In [46]:
os.chdir("/home/sergio/Documents/VOC2012/Annotations")
xml_files = os.listdir()

def read_content(xml_file: str):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text
    
        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))
         
            if filename in metadata_df.index.values:
            #print(filename in metadata_df.index.values)
                img_width = metadata_df.loc[filename]["width"]
                img_height = metadata_df.loc[filename]["height"]           
                [xmin,ymin,xmax,ymax]=list(convert_bbox_to_albumentations((xmin,ymin,xmax,ymax),source_format="pascal_voc",check_validity=True,rows = img_height,cols = img_width))
            
            else:
                x_center = xmin + np.floor((xmax-xmin-1)/2)
                y_center = ymin + np.floor((ymax-ymin-1)/2)
                width = xmax-xmin
                height = ymax-ymin                       
  
        list_with_single_boxes = [xmin,ymin,xmax,ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes

In [47]:
#Creamos el diccionario que posteriormente pasaremos a CSV

dict_annotations = {}
num_instance = 0
for i in xml_files:
    file_name,list_bbox,list_classes = read_content(i)
    
    for index,j in enumerate(list_classes):
        if j == "person":
            dict_annotations[num_instance] = {"file_name":file_name, "xmin":list_bbox[index][0],"ymin":list_bbox[index][1],
                                              "xmax":list_bbox[index][2],
                                              "ymax": list_bbox[index][3]}
            num_instance +=1

In [49]:
annotations_df = pd.DataFrame(dict_annotations).T
display(annotations_df.head())
print(annotations_df.shape)

Unnamed: 0,file_name,xmin,ymin,xmax,ymax
0,2008_005379.jpg,0.644,0.00266667,1.0,1.0
1,2012_003403.jpg,0.423567,0.55914,0.484076,0.763441
2,2009_003338.jpg,0.262,0.451807,0.4,1.0
3,2009_003338.jpg,0.336,0.460843,0.518,1.0
4,2009_003338.jpg,0.554,0.36747,0.73,1.0


(17401, 5)


In [50]:
#Guardamos el dataframe como CSV
os.chdir("/home/sergio/Documents/VOC2012")
annotations_df.to_csv("VOC_annotations.csv",index=False)