In [1]:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET

In [2]:
def convert_xml_to_csv(path):
    """Reads xml files with information on class name and bounding
    box and write them to a csv file
    
    Arguments:
        path = str, path where xml files are stored
    """
    
    # Inizialize class names vector and xml valies
    classes_names = []
    xml_values = []
    
    for xml_file in glob.glob(path + "/*.xml"):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        
        for member in root.findall("object"):
            classes_names.append(member[0].text)
            xml_values.append((
                root.find("filename").text,
                int(root.find("size")[0].text),
                int(root.find("size")[1].text),
                member[0].text,
                int(member.find('bndbox').find('xmin').text),
                int(member.find('bndbox').find('ymin').text),
                int(member.find('bndbox').find('xmax').text),
                int(member.find('bndbox').find('ymax').text),
            ))
            
    column_name = ["filename", "width", "height", "class",
                   "xmin", "ymin", "xmax", "ymax",]
    
    xml_df = pd.DataFrame(xml_values, columns=column_name)
    classes_names = list(set(classes_names))
    classes_names.sort()
    return xml_df, classes_names

In [3]:
input_path = 'data/images/train'


In [4]:
xml_df, classes_names = convert_xml_to_csv(input_path)


In [5]:
def create_classes_protobuf(classes_names):
    """Create a protocol buffer file storing
    the name of the classes. This is used by the
    object detection API
    
    Arguments:
        classes_names = list, classes names
    """
    
    pbtxt_content = ""
    
    for i, class_name in enumerate(classes_names):
            pbtxt_content = (
                pbtxt_content
                + "item {{\n    id: {0}\n    name: '{1}'\n}}\n\n".format(
                    i + 1, class_name
                )
            )
    
    return pbtxt_content.strip()

create_classes_protobuf(classes_names)

"item {\n    id: 1\n    name: 'f1_car'\n}"

In [6]:
label_map_path = 'label_map.pbtxt'

pbtxt_content = ""
for i, class_name in enumerate(classes_names):
            pbtxt_content = (
                pbtxt_content
                + "item {{\n    id: {0}\n    name: '{1}'\n}}\n\n".format(
                    i + 1, class_name
                )
            )
        
pbtxt_content

"item {\n    id: 1\n    name: 'f1_car'\n}\n\n"

In [7]:
pbtxt_content = pbtxt_content.strip()
pbtxt_content

with open(label_map_path, "w") as f:
    f.write(pbtxt_content)

In [None]:
xml_df, classes_names = xml_to_csv(args.inputDir)
    xml_df.to_csv(args.outputFile, index=None)
    print("Successfully converted xml to csv.")
    if args.labelMapDir:
        os.makedirs(args.labelMapDir, exist_ok=True)
        label_map_path = os.path.join(args.labelMapDir, "label_map.pbtxt")
        print("Generate `{}`".format(label_map_path))

        # Create the `label_map.pbtxt` file
        pbtxt_content = ""
        for i, class_name in enumerate(classes_names):
            pbtxt_content = (
                pbtxt_content
                + "item {{\n    id: {0}\n    name: '{1}'\n}}\n\n".format(
                    i + 1, class_name
                )
            )
        pbtxt_content = pbtxt_content.strip()
        with open(label_map_path, "w") as f:
            f.write(pbtxt_content)

In [None]:
# ""
# Usage:
# # Create train data:
# python xml_to_csv.py -i [PATH_TO_IMAGES_FOLDER]/train -o [PATH_TO_ANNOTATIONS_FOLDER]/train_labels.csv

# # Create test data:
# python xml_to_csv.py -i [PATH_TO_IMAGES_FOLDER]/test -o [PATH_TO_ANNOTATIONS_FOLDER]/test_labels.csv
# """

import os
import glob
import pandas as pd
import argparse
import xml.etree.ElementTree as ET


def xml_to_csv(path):
    """Iterates through all .xml files (generated by labelImg) in a given 
    directory and combines them in a single Pandas dataframe.

    Parameters:
    ----------
    path : {str}
        The path containing the .xml files
    Returns
    -------
    Pandas DataFrame
        The produced dataframe
    """
    classes_names = []
    xml_list = []
    for xml_file in glob.glob(path + "/*.xml"):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall("object"):
            classes_names.append(member[0].text)
            value = (
                root.find("filename").text,
                int(root.find("size")[0].text),
                int(root.find("size")[1].text),
                member[0].text,
                int(member.find('bndbox').find('xmin').text),
                int(member.find('bndbox').find('ymin').text),
                int(member.find('bndbox').find('xmax').text),
                int(member.find('bndbox').find('ymax').text),
            )
            xml_list.append(value)
    column_name = [
        "filename",
        "width",
        "height",
        "class",
        "xmin",
        "ymin",
        "xmax",
        "ymax",
    ]
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    classes_names = list(set(classes_names))
    classes_names.sort()
    return xml_df, classes_names


def main():
    # Initiate argument parser
    parser = argparse.ArgumentParser(
        description="Sample TensorFlow XML-to-CSV converter"
    )
    parser.add_argument(
        "-i",
        "--inputDir",
        help="Path to the folder where the input .xml files are stored",
        type=str,
    )
    parser.add_argument(
        "-o", "--outputFile", help="Name of output .csv file (including path)", type=str
    )

    parser.add_argument(
        "-l",
        "--labelMapDir",
        help="Directory path to save label_map.pbtxt file is specified.",
        type=str,
        default="",
    )

    args = parser.parse_args()

    if args.inputDir is None:
        args.inputDir = os.getcwd()
    if args.outputFile is None:
        args.outputFile = args.inputDir + "/labels.csv"

    assert os.path.isdir(args.inputDir)
    os.makedirs(os.path.dirname(args.outputFile), exist_ok=True)
    xml_df, classes_names = xml_to_csv(args.inputDir)
    xml_df.to_csv(args.outputFile, index=None)
    print("Successfully converted xml to csv.")
    if args.labelMapDir:
        os.makedirs(args.labelMapDir, exist_ok=True)
        label_map_path = os.path.join(args.labelMapDir, "label_map.pbtxt")
        print("Generate `{}`".format(label_map_path))

        # Create the `label_map.pbtxt` file
        pbtxt_content = ""
        for i, class_name in enumerate(classes_names):
            pbtxt_content = (
                pbtxt_content
                + "item {{\n    id: {0}\n    name: '{1}'\n}}\n\n".format(
                    i + 1, class_name
                )
            )
        pbtxt_content = pbtxt_content.strip()
        with open(label_map_path, "w") as f:
            f.write(pbtxt_content)


if __name__ == "__main__":
    main()


In [None]:
"""
Usage:

# Create train data:
python generate_tfrecord.py --label=<LABEL> --csv_input=<PATH_TO_ANNOTATIONS_FOLDER>/train_labels.csv  --output_path=<PATH_TO_ANNOTATIONS_FOLDER>/train.record <PATH_TO_ANNOTATIONS_FOLDER>/label_map.pbtxt

# Create test data:
python generate_tfrecord.py --label=<LABEL> --csv_input=<PATH_TO_ANNOTATIONS_FOLDER>/test_labels.csv  --output_path=<PATH_TO_ANNOTATIONS_FOLDER>/test.record  --label_map <PATH_TO_ANNOTATIONS_FOLDER>/label_map.pbtxt
"""

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import os
import io
import pandas as pd
import tensorflow as tf
import sys

sys.path.append("../../models/research")

from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

flags = tf.app.flags
flags.DEFINE_string("csv_input", "", "Path to the CSV input")
flags.DEFINE_string("output_path", "", "Path to output TFRecord")
flags.DEFINE_string(
    "label_map",
    "",
    "Path to the `label_map.pbtxt` contains the <class_name>:<class_index> pairs generated by `xml_to_csv.py` or manually.",
)
# if your image has more labels input them as
# flags.DEFINE_string('label0', '', 'Name of class[0] label')
# flags.DEFINE_string('label1', '', 'Name of class[1] label')
# and so on.
flags.DEFINE_string("img_path", "", "Path to images")
FLAGS = flags.FLAGS


def split(df, group):
    data = namedtuple("data", ["filename", "object"])
    gb = df.groupby(group)
    return [
        data(filename, gb.get_group(x))
        for filename, x in zip(gb.groups.keys(), gb.groups)
    ]


def create_tf_example(group, path, label_map):
    with tf.gfile.GFile(os.path.join(path, "{}".format(group.filename)), "rb") as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode("utf8")
    image_format = b"jpg"
    # check if the image format is matching with your images.
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row["xmin"] / width)
        xmaxs.append(row["xmax"] / width)
        ymins.append(row["ymin"] / height)
        ymaxs.append(row["ymax"] / height)
        classes_text.append(row["class"].encode("utf8"))
        class_index = label_map.get(row["class"])
        assert (
            class_index is not None
        ), "class label: `{}` not found in label_map: {}".format(
            row["class"], label_map
        )
        classes.append(class_index)

    tf_example = tf.train.Example(
        features=tf.train.Features(
            feature={
                "image/height": dataset_util.int64_feature(height),
                "image/width": dataset_util.int64_feature(width),
                "image/filename": dataset_util.bytes_feature(filename),
                "image/source_id": dataset_util.bytes_feature(filename),
                "image/encoded": dataset_util.bytes_feature(encoded_jpg),
                "image/format": dataset_util.bytes_feature(image_format),
                "image/object/bbox/xmin": dataset_util.float_list_feature(xmins),
                "image/object/bbox/xmax": dataset_util.float_list_feature(xmaxs),
                "image/object/bbox/ymin": dataset_util.float_list_feature(ymins),
                "image/object/bbox/ymax": dataset_util.float_list_feature(ymaxs),
                "image/object/class/text": dataset_util.bytes_list_feature(
                    classes_text
                ),
                "image/object/class/label": dataset_util.int64_list_feature(classes),
            }
        )
    )
    return tf_example


def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(os.getcwd(), FLAGS.img_path)
    examples = pd.read_csv(FLAGS.csv_input)

    # Load the `label_map` from pbtxt file.
    from object_detection.utils import label_map_util

    label_map = label_map_util.load_labelmap(FLAGS.label_map)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True
    )
    category_index = label_map_util.create_category_index(categories)
    label_map = {}
    for k, v in category_index.items():
        label_map[v.get("name")] = v.get("id")

    grouped = split(examples, "filename")
    for group in grouped:
        tf_example = create_tf_example(group, path, label_map)
        writer.write(tf_example.SerializeToString())

    writer.close()
    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print("Successfully created the TFRecords: {}".format(output_path))


if __name__ == "__main__":
    tf.app.run()



In [9]:
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import os
import io
import pandas as pd
import tensorflow as tf
import sys

# sys.path.append("../../models/research")

from PIL import Image
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
from collections import namedtuple, OrderedDict

In [23]:
writer = tf.python_io.TFRecordWriter("data/train.record")
path = os.path.join("data/images/train")
examples = xml_df

label_map = label_map_util.load_labelmap('label_map.pbtxt')

categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True
    )

category_index = label_map_util.create_category_index(categories)

label_map = {}
for k, v in category_index.items():
    label_map[v.get("name")] = v.get("id")
    
grouped = split(examples, "filename")

for group in grouped:
    tf_example = create_tf_example(group, path, label_map)
    writer.write(tf_example.SerializeToString())
    
writer.close()

In [23]:
images_path = "data/images/train"
images_xml_df = xml_df
label_map_path = 'label_map.pbtxt'
output_path = "data/train2.record"

In [26]:
TfRecordConverter(images_path, images_xml_df, label_map_path, output_path).create_tfrecord()

TFRecords created in data/train2.record


In [25]:
class TfRecordConverter(object):
    
    def __init__(self, images_path, images_xml_df, label_map_path, output_path):
        """Class to convert images and annotations to XML
        
        Arguments:
            images_path = str, path where the images to convert are stored
            images_xml_df = pandas df with reference to files and annotations
                created with the utilis to converto_xml_to_csv function
            label_map_path = str, path where a protobuf label map file is stored
            output_path = str, path where tfr records are stored   
        """
        self.images_path = images_path
        self.xml_df = images_xml_df
        self.label_map_path = label_map_path
        self.output_path = output_path
        
    def create_tfrecord(self):
        """Main function to create TF Records"""
        
        # Inizialize the TFR Record Writer
        writer = tf.python_io.TFRecordWriter(self.output_path)
        path = os.path.join(self.images_path)
        
        # Create label map dictionary
        label_map = label_map_util.load_labelmap(self.label_map_path)

        categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)

        category_index = label_map_util.create_category_index(categories)

        label_map = {}
        for k, v in category_index.items():
            label_map[v.get("name")] = v.get("id")
        
        # Create groups
        grouped = self._split()
        
        # Create TFR Record
        for group in grouped:
            tf_example = self._create_tf_example(group, label_map)
            writer.write(tf_example.SerializeToString())
        
        writer.close()
        print('TFRecords created in {}'.format(self.output_path))
        
    
    def _split(self):
        """Convenience function that input a pandas 
        dataframe of xml annotations related to an image
        and split into several smaller dataframe for each image.
        
        To create the xml use the convert_xml_to_csv in the 
        utils package.
        
        Arguments:
            df = pandas dataframe
        """
        
        data = namedtuple("data", ["filename", "object"])
        gb = self.xml_df.groupby("filename")
        
        groups = [
            data(filename, gb.get_group(x))
            for filename, x in zip(gb.groups.keys(), gb.groups)
        ]
        
        return groups
    
    def _create_tf_example(self, group, label_map):
        """Convert an images and respective annotation
        to TFR Record (a binary file formata)
        
        Arguments:
            group = pandas data frame
            label_map = dict
        """
        
        # Serialize Image
        with tf.gfile.GFile(os.path.join(
            self.images_path, "{}".format(group.filename)), "rb") as fid:
            encoded_jpg = fid.read()
            
        encoded_jpg_io = io.BytesIO(encoded_jpg)
        image = Image.open(encoded_jpg_io)
        width, height = image.size

        filename = group.filename.encode("utf8")
        image_format = b"jpg"
        
        # check if the image format is matching with your images.
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []
        classes_text = []
        classes = []
        
        # Serialize XML annotation
        for index, row in group.object.iterrows():
            xmins.append(row["xmin"] / width)
            xmaxs.append(row["xmax"] / width)
            ymins.append(row["ymin"] / height)
            ymaxs.append(row["ymax"] / height)
            classes_text.append(row["class"].encode("utf8"))
            class_index = label_map.get(row["class"])
            assert (
                class_index is not None
            ), "class label: `{}` not found in label_map: {}".format(
                row["class"], label_map
            )
            classes.append(class_index)
        
        # Create Tf Record
        tf_example = tf.train.Example(
            features=tf.train.Features(
                feature={
                    "image/height": dataset_util.int64_feature(height),
                    "image/width": dataset_util.int64_feature(width),
                    "image/filename": dataset_util.bytes_feature(filename),
                    "image/source_id": dataset_util.bytes_feature(filename),
                    "image/encoded": dataset_util.bytes_feature(encoded_jpg),
                    "image/format": dataset_util.bytes_feature(image_format),
                    "image/object/bbox/xmin": dataset_util.float_list_feature(xmins),
                    "image/object/bbox/xmax": dataset_util.float_list_feature(xmaxs),
                    "image/object/bbox/ymin": dataset_util.float_list_feature(ymins),
                    "image/object/bbox/ymax": dataset_util.float_list_feature(ymaxs),
                    "image/object/class/text": dataset_util.bytes_list_feature(
                        classes_text
                    ),
                    "image/object/class/label": dataset_util.int64_list_feature(classes),
                }
            )
        )
        return tf_example

In [19]:
def split(df, group):
    data = namedtuple("data", ["filename", "object"])
    gb = df.groupby(group)
    return [
        data(filename, gb.get_group(x))
        for filename, x in zip(gb.groups.keys(), gb.groups)
    ]

def create_tf_example(group, path, label_map):
    with tf.gfile.GFile(os.path.join(path, "{}".format(group.filename)), "rb") as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode("utf8")
    image_format = b"jpg"
    # check if the image format is matching with your images.
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row["xmin"] / width)
        xmaxs.append(row["xmax"] / width)
        ymins.append(row["ymin"] / height)
        ymaxs.append(row["ymax"] / height)
        classes_text.append(row["class"].encode("utf8"))
        class_index = label_map.get(row["class"])
        assert (
            class_index is not None
        ), "class label: `{}` not found in label_map: {}".format(
            row["class"], label_map
        )
        classes.append(class_index)

    tf_example = tf.train.Example(
        features=tf.train.Features(
            feature={
                "image/height": dataset_util.int64_feature(height),
                "image/width": dataset_util.int64_feature(width),
                "image/filename": dataset_util.bytes_feature(filename),
                "image/source_id": dataset_util.bytes_feature(filename),
                "image/encoded": dataset_util.bytes_feature(encoded_jpg),
                "image/format": dataset_util.bytes_feature(image_format),
                "image/object/bbox/xmin": dataset_util.float_list_feature(xmins),
                "image/object/bbox/xmax": dataset_util.float_list_feature(xmaxs),
                "image/object/bbox/ymin": dataset_util.float_list_feature(ymins),
                "image/object/bbox/ymax": dataset_util.float_list_feature(ymaxs),
                "image/object/class/text": dataset_util.bytes_list_feature(
                    classes_text
                ),
                "image/object/class/label": dataset_util.int64_list_feature(classes),
            }
        )
    )
    return tf_example

In [17]:
for group in grouped:
    print(group[1])
    break

    filename  width  height   class  xmin  ymin  xmax  ymax
133    1.png    700     700  f1_car    32   399   350   528
134    1.png    700     700  f1_car   214   395   540   501


In [18]:
xml_df.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,88.png,700,700,f1_car,114,365,467,668
1,88.png,700,700,f1_car,376,338,574,501
2,88.png,700,700,f1_car,554,289,669,393
3,63.png,700,700,f1_car,271,441,408,588
4,63.png,700,700,f1_car,166,209,208,249
