# XML2JSON
Este notebook contiene el código necesario para transformar las detecciones de archivos XML a formato VGG en archivos JSON.

In [None]:
import json
import xml.etree.ElementTree as etree
import cv2
import os
from collections import OrderedDict
import glob
import numpy as np

# Leer todos los groundtruth y poner a negro zonas ignoradas
Antes de entrenar es necesario tapar las zonas que deben ser ignoradas porque debido a la distancia los coches todavía son muy pequeños para ser detectados

In [None]:
# Pasar zona ignorada a negro (1 imagen)
image_path = "MVI_20011/img00001.jpg"
image = cv2.imread(image_path)
tree = etree.parse('MVI_20011.xml')

ignored_zones = tree.findall("./ignored_region/*")

for n, black_box in enumerate(ignored_zones):
    col_min = int(float(black_box.get("left")))
    row_min = int(float(black_box.get("top")))
    col_max = int(float(black_box.get("left"))) + int(float(black_box.get("width")))
    row_max = int(float(black_box.get("top"))) + int(float(black_box.get("height")))
    print([col_min,row_min,col_max,row_max])
    cv2.rectangle(
        img=image,
        pt1=(col_min, row_min),
        pt2=(col_max, row_max),
        color=(0,0,0),
        thickness=-1)

cv2.imwrite("prueba.png",image)

In [None]:
# Pasar una carpeta a negro
gt = "MVI_20011.xml"
print(os.path.basename(gt[:-4]))
folder = os.path.basename(gt[:-4])
for image_path in glob.glob('folder + '/*.jpg'):
    image = cv2.imread(image_path)
    tree = etree.parse(gt)
    
    ignored_zones = tree.findall("./ignored_region/*")

    for n, black_box in enumerate(ignored_zones):
        col_min = int(float(black_box.get("left")))
        row_min = int(float(black_box.get("top")))
        col_max = int(float(black_box.get("left"))) + int(float(black_box.get("width")))
        row_max = int(float(black_box.get("top"))) + int(float(black_box.get("height")))
        cv2.rectangle(
            img=image,
            pt1=(col_min, row_min),
            pt2=(col_max, row_max),
            color=(0,0,0),
            thickness=-1)

    cv2.imwrite(image_path,image)

In [None]:
# Pasar todo el dataset a negro
for gt in glob.glob("Datasets/DETRAC/DETRAC-Train-Annotations-XML/*.xml"):
    print(os.path.basename(gt[:-4]))
    folder = os.path.basename(gt[:-4])
    
    for image_path in glob.glob('Datasets/DETRAC/Insight-MVT_Annotation_Train/' + folder + '/*.jpg'):
        image = cv2.imread(image_path)
        tree = etree.parse(gt)

        ignored_zones = tree.findall("./ignored_region/*")

        for n, black_box in enumerate(ignored_zones):
            col_min = int(float(black_box.get("left")))
            row_min = int(float(black_box.get("top")))
            col_max = int(float(black_box.get("left"))) + int(float(black_box.get("width")))
            row_max = int(float(black_box.get("top"))) + int(float(black_box.get("height")))
            cv2.rectangle(
                img=image,
                pt1=(col_min, row_min),
                pt2=(col_max, row_max),
                color=(0,0,0),
                thickness=-1)

        cv2.imwrite(image_path,image)

# Exportar todas las anotaciones a archivos individuales
Es necesario convertir las anotaciones a un solo archivo por vídeo para realizar el entrenamiento

In [None]:
# Prueba para una imagen
image_path = "MVI_20011/img00001.jpg"
tree = etree.parse('MVI_20011.xml')

# JSON
img = cv2.imread(image_path)
image_size = os.stat(image_path).st_size # Tamanho
image_name = "img00001.jpg"    # Nombre

vgg_key = "{}{}".format(image_name, image_size)
vgg_main = {vgg_key: OrderedDict()}

# Datos generales
vgg = vgg_main[vgg_key]
vgg["fileref"] = ""
vgg["size"] = image_size
vgg["filename"] = image_name
vgg["base64_img_data"] = ""
vgg["file_attributes"] = {}
vgg["regions"] = OrderedDict()


# Anotaciones
id = int(image_path[13:-4])  # Busco que frame estoy procesando

xml_frame = tree.findall("./frame[@num=\'" + str(id) + "\']/target_list/target")
for n, detection in enumerate(xml_frame):
    n = str(n)
    
    vgg["regions"][n] = OrderedDict()
    vgg["regions"][n]["shape_attributes"] = OrderedDict()
    vgg["regions"][n]["shape_attributes"]["name"] = "rect"
    vgg["regions"][n]["shape_attributes"]["x"] = detection[0].get("left")
    vgg["regions"][n]["shape_attributes"]["y"] = detection[0].get("top")
    vgg["regions"][n]["shape_attributes"]["width"] = detection[0].get("width")
    vgg["regions"][n]["shape_attributes"]["height"] = detection[0].get("height")

    vgg["regions"][n]["region_attributes"] = OrderedDict()
    vgg["regions"][n]["region_attributes"]["Class"] = detection[1].get("vehicle_type")

with open(image_path.replace("jpg", "json"), "w") as f:
    json.dump(vgg_main, f)

In [None]:
# Dado un groundtruth sacar un json por foto
gt = "MVI_20011.xml"
print(os.path.basename(gt[:-4]))
folder = os.path.basename(gt[:-4])
for image_path in glob.glob('folder + '/*.jpg'):
    tree = etree.parse(gt)

    # JSON
    img = cv2.imread(image_path)
    image_size = os.stat(image_path).st_size # Tamanho
    image_name = os.path.basename(image_path)    # Nombre

    vgg_key = "{}{}".format(image_name, image_size)
    vgg_main = {vgg_key: OrderedDict()}

    # Datos generales
    vgg = vgg_main[vgg_key]
    vgg["fileref"] = ""
    vgg["size"] = image_size
    vgg["filename"] = image_name
    vgg["base64_img_data"] = ""
    vgg["file_attributes"] = {}
    vgg["regions"] = OrderedDict()


    # Anotaciones
    id = int(os.path.basename(image_path)[3:-4])  # Busco que frame estoy procesando

    xml_frame = tree.findall("./frame[@num=\'" + str(id) + "\']/target_list/target")
    for n, detection in enumerate(xml_frame):
        n = str(n)

        vgg["regions"][n] = OrderedDict()
        vgg["regions"][n]["shape_attributes"] = OrderedDict()
        vgg["regions"][n]["shape_attributes"]["name"] = "rect"
        vgg["regions"][n]["shape_attributes"]["x"] = detection[0].get("left")
        vgg["regions"][n]["shape_attributes"]["y"] = detection[0].get("top")
        vgg["regions"][n]["shape_attributes"]["width"] = detection[0].get("width")
        vgg["regions"][n]["shape_attributes"]["height"] = detection[0].get("height")

        vgg["regions"][n]["region_attributes"] = OrderedDict()
        vgg["regions"][n]["region_attributes"]["Class"] = detection[1].get("vehicle_type")

    
    with open(image_path.replace("jpg", "json"), "w") as f:
        json.dump(vgg_main, f)

In [None]:
# Leer todos los groundtruth y exportar todas las anotaciones a archivos individuales
for gt in glob.glob("Datasets/DETRAC/DETRAC-Train-Annotations-XML/*.xml"):
    print(os.path.basename(gt[:-4]))
    folder = os.path.basename(gt[:-4])
    
    for image_path in glob.glob('Datasets/DETRAC/Insight-MVT_Annotation_Train/' + folder + '/*.jpg'):
        tree = etree.parse(gt)
        # JSON
        img = cv2.imread(image_path)
        image_size = os.stat(image_path).st_size # Tamanho
        image_name = os.path.basename(image_path)    # Nombre

        vgg_key = "{}{}".format(image_name, image_size)
        vgg_main = {vgg_key: OrderedDict()}

        # Datos generales
        vgg = vgg_main[vgg_key]
        vgg["fileref"] = ""
        vgg["size"] = image_size
        vgg["filename"] = image_name
        vgg["base64_img_data"] = ""
        vgg["file_attributes"] = {}
        vgg["regions"] = OrderedDict()


        # Anotaciones
        id = int(os.path.basename(image_path)[3:-4])  # Busco que frame estoy procesando

        xml_frame = tree.findall("./frame[@num=\'" + str(id) + "\']/target_list/target")
        for n, detection in enumerate(xml_frame):
            n = str(n)

            vgg["regions"][n] = OrderedDict()
            vgg["regions"][n]["shape_attributes"] = OrderedDict()
            vgg["regions"][n]["shape_attributes"]["name"] = "rect"
            vgg["regions"][n]["shape_attributes"]["x"] = detection[0].get("left")
            vgg["regions"][n]["shape_attributes"]["y"] = detection[0].get("top")
            vgg["regions"][n]["shape_attributes"]["width"] = detection[0].get("width")
            vgg["regions"][n]["shape_attributes"]["height"] = detection[0].get("height")

            vgg["regions"][n]["region_attributes"] = OrderedDict()
            vgg["regions"][n]["region_attributes"]["Class"] = detection[1].get("vehicle_type")


        with open(image_path.replace("jpg", "json"), "w") as f:
            json.dump(vgg_main, f)

# Pruebas de lectura del archivo XML

In [None]:
tree = etree.parse('MVI_20011.xml')
root = tree.getroot()
print(root.attrib)

In [None]:
for child in root:
    print(child.attrib)
    for nieto in child:
        print (nieto.attrib)

In [None]:
#print([el.attrib.get('num') for el in root.findall('./sequence')])
print([el.attrib.get("id") for el in root.findall("./*/*/target")])

In [None]:
for frame in root.findall("./frame"):
    id = frame.attrib.get("num")

In [None]:
# Imagen
image_path = "MVI_20011/img00001.jpg"

img = cv2.imread(image_path)
image_size = os.stat(image_path).st_size
image_name = "{}.png".format("hola")

vgg_key = "{}{}".format(image_name, image_size)
vgg_main = {vgg_key: OrderedDict()}

vgg = vgg_main[vgg_key]
vgg["fileref"] = ""
vgg["size"] = image_size
vgg["filename"] = image_name
vgg["base64_img_data"] = ""
vgg["file_attributes"] = {}
vgg["regions"] = OrderedDict()

print(vgg)
print(int(image_path[13:-4]))
id = int(image_path[13:-4])

In [None]:
xml_frame = tree.findall("./frame[@num=\'" + str(id) + "\']/target_list/target")
for n, detection in enumerate(xml_frame):
    print(n)
    print (detection[0].get("left"))
    print (detection[0].get("top"))
    print (detection[0].get("width"))
    print (detection[0].get("height"))
    print (detection[1].get("vehicle_type"))
    #print(detection.attrib)