In [2]:
import numpy as np
import os
import argparse
import cv2
from imutils import paths
import json
import glob
import xml.etree.ElementTree as Et
from sklearn.model_selection import train_test_split
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import shutil

#Selenium components
import selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

## Converting XML to CSV

In [12]:
def xml_to_csv(path): #check your xml file structure to make sure you are correctly pointing to the filename, width, etc
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = Et.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,  # filename
                     int(root.find('size')[0].text),  # width
                     int(root.find('size')[1].text),  # height
                     member[0].text,  # class
                     int(member[5][0].text),  # xmin
                     int(member[5][1].text),  # ymin
                     int(member[5][2].text),  # xmax
                     int(member[5][3].text),  # ymax
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

In [13]:
def main():
    for directory in ['train','test']:
        image_path = os.path.join(os.getcwd(), images_path +'{}'.format(directory))
        xml_df = xml_to_csv(image_path)
        xml_df.to_csv('./models/research/object_detection/data_label/{}_labels.csv'.format(directory), index=None)
        print('Successfully converted xml to csv.')


In [14]:
main() #This will convert the dataframe to csv

Successfully converted xml to csv.
Successfully converted xml to csv.


# Generating TF_Records

In [28]:
# Adapted from
# https://github.com/datitran/raccoon_dataset/blob/master/generate_tfrecord.py

from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import tensorflow as tf
import io

from PIL import Image
from models.research.object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict


In [29]:
# Add more class labels as needed, make sure to start at 1
def class_text_to_int(row_label):
    if row_label == 'huracan':
        return 1
    if row_label == 'gallardo':
        return 2
    if row_label == 'aventador':
        return 3
    if row_label == 'murcielago':
        return 4
    else:
        None

In [30]:
data_path = "./models/research/object_detection/data/"

In [31]:
def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

In [32]:
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
    }))
    return tf_example

In [33]:
def main():
    for i in ['test', 'train']:
        writer = tf.python_io.TFRecordWriter(data_path+i+'.record')
        path = os.path.join(os.getcwd(),'./models/research/object_detection/images/'+i+'/' )
        examples = pd.read_csv('./models/research/object_detection/data_label/'+i+'_labels'+'.csv')
        grouped = split(examples, 'filename')
        for group in grouped:
            tf_example = create_tf_example(group, path)
            writer.write(tf_example.SerializeToString())
        writer.close()
        print('Successfully created the '+i+ ' TFRecords')

In [34]:
main()

Successfully created the test TFRecords
Successfully created the train TFRecords


## Great! Your Tf_records should be created, lets check

In [35]:
os.listdir(data_path)

['mscoco_label_map.pbtxt',
 'events.out.tfevents.1530915204.Brians-MacBook-Pro-2.local',
 'events.out.tfevents.1530916614.Brians-MacBook-Pro-2.local',
 '.DS_Store',
 'checkpoint',
 'events.out.tfevents.1530915997.Brians-MacBook-Pro-2.local',
 'events.out.tfevents.1530917111.Brians-MacBook-Pro-2.local',
 'train.record',
 'lambo_detection.pbtxt',
 'oid_object_detection_challenge_500_label_map.pbtxt',
 'events.out.tfevents.1530915300.Brians-MacBook-Pro-2.local',
 'pipeline.config',
 'ssd_mobilenet_v1_coco.config',
 'model.ckpt-0.data-00000-of-00001',
 'graph.pbtxt',
 'kitti_label_map.pbtxt',
 'pascal_label_map.pbtxt',
 'test.record',
 'model.ckpt-0.index',
 'oid_bbox_trainable_label_map.pbtxt',
 'ava_label_map_v2.1.pbtxt',
 'model.ckpt-0.meta']

# The next steps are in part 3