In [2]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

from albumentations import (
    convert_bbox_to_albumentations,convert_bboxes_to_albumentations
)

## VOC 2012 TEST SET

In [2]:
#Leemos los xml files para cada anotación
os.chdir("/home/sergio/Downloads/download/VOCdevkit/VOC2012/Annotations")
xml_files = os.listdir()

In [6]:
def read_content(xml_file: str):
    '''
    Función que toma un archivo xml y extrae las coordendas de los bounding boxes
    de cada imagen y las clases de cada bounding box
    '''
    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text

        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))

        list_with_single_boxes = [xmin, ymin, xmax, ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes
#Ejemplo
file_name, boxes, classes = read_content("2008_000200.xml")
print(file_name)
print(boxes)
print(classes)

2008_000200.jpg
[[119, 76, 184, 311], [266, 43, 338, 323]]
['person', 'person']


In [8]:
#Leemos toda la base de datos
kept_files = []
person_instances =0
total_instances = 0

for i in xml_files:
    file_name, boxes, classes = read_content(i)
    total_instances += len(boxes)
    if "person" in classes:
        kept_files.append(file_name)
        num_person = classes.count("person")
        person_instances +=num_person
        
print("Total images: " + str(len(xml_files)))
print("Images with person instances: " + str(len(kept_files)))
print("Person instances: " + str(person_instances))
print("Total instances: " + str(total_instances))


Total images: 5138
Images with person instances: 5138
Person instances: 7326
Total instances: 7330


## Creating the metadata file (CSV)

In [11]:
from PIL import Image, ExifTags
os.chdir("/home/sergio/Downloads/download/VOCdevkit/VOC2012/JPEGImages")

voc_img_metadata_test = {}

for i in kept_files:
    img = Image.open(i)
    width,height = img.size
    voc_img_metadata_test[i]={"width":width,"height":height}

In [13]:
metadata_test_df = pd.DataFrame(voc_img_metadata_test).T

In [14]:
display(metadata_test_df.head())
print(metadata_test_df.shape)

Unnamed: 0,width,height
2012_002663.jpg,500,375
2011_004593.jpg,500,331
2011_003690.jpg,500,335
2012_000164.jpg,500,375
2012_002706.jpg,500,250


(5138, 2)


In [15]:
#Guardamos la tabla como un CSV
os.chdir("/home/sergio/Downloads/download/VOCdevkit/VOC2012")
metadata_df.to_csv("VOC_metadata_test.csv",index=True)

## Filtering the images

Seleccionamos las imágenes que contienen la clase "Person" y las guardamos en una nueva carpeta llamada "pedestrian_dataset_voc"

In [16]:
os.chdir("/home/sergio/Downloads/download/VOCdevkit/VOC2012")

try:
    os.mkdir("pedestrian_dataset_voc_test_set")
except:
    pass

In [17]:
from shutil import copyfile

def get_class_images(SOURCE, NEW_LOC,CLASS_NAMES):

    names_list =os.listdir(SOURCE)

    for i in names_list:
        if i in CLASS_NAMES:
                copyfile(SOURCE + "/" + i, NEW_LOC + "/" + i)

In [18]:
get_class_images("/home/sergio/Downloads/download/VOCdevkit/VOC2012/JPEGImages","/home/sergio/Downloads/download/VOCdevkit/VOC2012/pedestrian_dataset_voc_test_set",kept_files)

In [19]:
os.chdir("/home/sergio/Downloads/download/VOCdevkit/VOC2012/JPEGImages")
#print(os.listdir()[:5])

## Creating the Annotations file (CSV)

Creamos un CSV de anotaciones, cada fila será una anotación, donde viene el archivo al que pertenece así como sus coordenadas del tipo (x_min,y_min,x_max,y_max) normalizados, es decir, de [0,1], que es el formato que el módulo de Albumentations

In [20]:
os.chdir("/home/sergio/Downloads/download/VOCdevkit/VOC2012/Annotations")
xml_files = os.listdir()

def read_content(xml_file: str):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text
    
        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))
         
            if filename in metadata_df.index.values:
            #print(filename in metadata_df.index.values)
                img_width = metadata_df.loc[filename]["width"]
                img_height = metadata_df.loc[filename]["height"]           
                [xmin,ymin,xmax,ymax]=list(convert_bbox_to_albumentations((xmin,ymin,xmax,ymax),source_format="pascal_voc",check_validity=True,rows = img_height,cols = img_width))
            
            else:
                x_center = xmin + np.floor((xmax-xmin-1)/2)
                y_center = ymin + np.floor((ymax-ymin-1)/2)
                width = xmax-xmin
                height = ymax-ymin                       
  
        list_with_single_boxes = [xmin,ymin,xmax,ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes

In [21]:
#Creamos el diccionario que posteriormente pasaremos a CSV

dict_annotations = {}
num_instance = 0
for i in xml_files:
    file_name,list_bbox,list_classes = read_content(i)
    
    for index,j in enumerate(list_classes):
        if j == "person":
            dict_annotations[num_instance] = {"file_name":file_name, "xmin":list_bbox[index][0],"ymin":list_bbox[index][1],
                                              "xmax":list_bbox[index][2],
                                              "ymax": list_bbox[index][3]}
            num_instance +=1

In [22]:
annotations_test_df = pd.DataFrame(dict_annotations).T
display(annotations_test_df.head())
print(annotations_test_df.shape)

Unnamed: 0,file_name,xmin,ymin,xmax,ymax
0,2012_002663.jpg,0.824,0.512,0.954,0.690667
1,2011_004593.jpg,0.428,0.432024,0.586,0.779456
2,2011_003690.jpg,0.198,0.00298507,0.82,1.0
3,2011_003690.jpg,0.206,0.310448,0.524,1.0
4,2012_000164.jpg,0.246,0.314667,0.796,1.0


(7326, 5)


In [24]:
#Guardamos el dataframe como CSV
os.chdir("/home/sergio/Downloads/download/VOCdevkit/VOC2012")
annotations_test_df.to_csv("VOC_annotations_test.csv",index=False)

## VOC 2007 TEST SET

In [3]:
#Leemos los xml files para cada anotación
os.chdir("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/Annotations")
xml_files = os.listdir()

In [4]:
def read_content(xml_file: str):
    '''
    Función que toma un archivo xml y extrae las coordendas de los bounding boxes
    de cada imagen y las clases de cada bounding box
    '''
    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text

        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))

        list_with_single_boxes = [xmin, ymin, xmax, ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes
#Ejemplo
file_name, boxes, classes = read_content("000001.xml")
print(file_name)
print(boxes)
print(classes)

000001.jpg
[[48, 240, 195, 371], [8, 12, 352, 498]]
['dog', 'person']


In [5]:
#Leemos toda la base de datos
kept_files = []
person_instances =0
total_instances = 0

for i in xml_files:
    file_name, boxes, classes = read_content(i)
    total_instances += len(boxes)
    if "person" in classes:
        kept_files.append(file_name)
        num_person = classes.count("person")
        person_instances +=num_person
        
print("Total images: " + str(len(xml_files)))
print("Images with person instances: " + str(len(kept_files)))
print("Person instances: " + str(person_instances))
print("Total instances: " + str(total_instances))


Total images: 4952
Images with person instances: 2097
Person instances: 5227
Total instances: 14976


## Creating the metadata file (CSV)

In [6]:
from PIL import Image, ExifTags
os.chdir("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages")

voc_img_metadata_test = {}

for i in kept_files:
    img = Image.open(i)
    width,height = img.size
    voc_img_metadata_test[i]={"width":width,"height":height}

In [7]:
metadata_test_df = pd.DataFrame(voc_img_metadata_test).T

In [8]:
display(metadata_test_df.head())
print(metadata_test_df.shape)

Unnamed: 0,width,height
001353.jpg,500,333
000166.jpg,500,375
007186.jpg,500,375
006115.jpg,500,384
007620.jpg,318,480


(2097, 2)


In [10]:
#Guardamos la tabla como un CSV
os.chdir("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007")
metadata_test_df.to_csv("VOC_metadata_test_2007.csv",index=True)

## Filtering the images

Seleccionamos las imágenes que contienen la clase "Person" y las guardamos en una nueva carpeta llamada "pedestrian_dataset_voc"

In [11]:
os.chdir("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007")

try:
    os.mkdir("pedestrian_dataset_voc_test_set_2007")
except:
    pass

In [12]:
from shutil import copyfile

def get_class_images(SOURCE, NEW_LOC,CLASS_NAMES):

    names_list =os.listdir(SOURCE)

    for i in names_list:
        if i in CLASS_NAMES:
                copyfile(SOURCE + "/" + i, NEW_LOC + "/" + i)

In [14]:
get_class_images("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages","/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/pedestrian_dataset_voc_test_set_2007",kept_files)

In [15]:
os.chdir("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages")
print(os.listdir()[:5])

['000914.jpg', '004744.jpg', '002556.jpg', '008369.jpg', '007262.jpg']


## Creating the Annotations file (CSV)

Creamos un CSV de anotaciones, cada fila será una anotación, donde viene el archivo al que pertenece así como sus coordenadas del tipo (x_min,y_min,x_max,y_max) normalizados, es decir, de [0,1], que es el formato que el módulo de Albumentations

In [18]:
os.chdir("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/Annotations")
xml_files = os.listdir()

metadata_df=metadata_test_df

def read_content(xml_file: str):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    list_with_all_classes = []
    for boxes in root.iter('object'):
        
        #name = root.find("name").text
        filename = root.find('filename').text
    
        ymin, xmin, ymax, xmax = None, None, None, None

        for names in boxes.findall("name"):
            list_with_all_classes.append(names.text)
        for box in boxes.findall("bndbox"):
            
            ymin = int(float(box.find("ymin").text))
            xmin = int(float(box.find("xmin").text))
            ymax = int(float(box.find("ymax").text))
            xmax = int(float(box.find("xmax").text))
         
            if filename in metadata_df.index.values:
            #print(filename in metadata_df.index.values)
                img_width = metadata_df.loc[filename]["width"]
                img_height = metadata_df.loc[filename]["height"]           
                [xmin,ymin,xmax,ymax]=list(convert_bbox_to_albumentations((xmin,ymin,xmax,ymax),source_format="pascal_voc",check_validity=True,rows = img_height,cols = img_width))
            
            else:
                x_center = xmin + np.floor((xmax-xmin-1)/2)
                y_center = ymin + np.floor((ymax-ymin-1)/2)
                width = xmax-xmin
                height = ymax-ymin                       
  
        list_with_single_boxes = [xmin,ymin,xmax,ymax]
        list_with_all_boxes.append(list_with_single_boxes)

    return filename, list_with_all_boxes,list_with_all_classes

In [19]:
#Creamos el diccionario que posteriormente pasaremos a CSV

dict_annotations = {}
num_instance = 0
for i in xml_files:
    file_name,list_bbox,list_classes = read_content(i)
    
    for index,j in enumerate(list_classes):
        if j == "person":
            dict_annotations[num_instance] = {"file_name":file_name, "xmin":list_bbox[index][0],"ymin":list_bbox[index][1],
                                              "xmax":list_bbox[index][2],
                                              "ymax": list_bbox[index][3]}
            num_instance +=1

In [20]:
annotations_test_df = pd.DataFrame(dict_annotations).T
display(annotations_test_df.head())
print(annotations_test_df.shape)

Unnamed: 0,file_name,xmin,ymin,xmax,ymax
0,001353.jpg,0.19,0.00600601,0.736,0.996997
1,001353.jpg,0.006,0.0780781,0.19,0.990991
2,000166.jpg,0.648,0.453333,0.708,0.76
3,000166.jpg,0.486,0.48,0.584,0.605333
4,000166.jpg,0.004,0.453333,0.044,0.709333


(5227, 5)


In [21]:
#Guardamos el dataframe como CSV
os.chdir("/home/sergio/Downloads/VOCtest_06-Nov-2007/VOCdevkit/VOC2007")
annotations_test_df.to_csv("VOC_annotations_test.csv",index=False)