Following these:  
https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/training.html
https://www.tensorflow.org/tutorials/load_data/tf_records#tfrecord_files_using_tfdata
https://github.com/tensorflow/models/blob/master/research/object_detection/README.md

In [110]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import IPython.display as display
import os
import random

print(tf.__version__)
assert tf.__version__ == '1.13.1', 'This notebook was meant to be used with tensorflow v1'

1.13.1


In [111]:
tf.enable_eager_execution()

In [120]:
IMG_H, IMG_W = 160, 160
ANNOTATIONS_PATH =  pathlib.Path('workspace/training_demo/annotations')
IMAGES_PATH = '../../../image_gathering/imgs'
data_root = pathlib.Path(IMAGES_PATH)
print(data_root)

../../../image_gathering/imgs


In [113]:
def preprocess_image(image):
  image = tf.image.decode_jpeg(image, channels=3)
  image = tf.image.resize(image, [IMG_H, IMG_W])
  image /= 255.0  # normalize to [0,1] range
  return image

def load_and_preprocess_image(path):
  image = tf.io.read_file(path)
  return preprocess_image(image)

def load_and_preprocess_from_path_label(path, label):
  return load_and_preprocess_image(path), label

def load_training_images(data_root):
    if not data_root.exists():
        raise
    # Images' paths
    all_image_paths = list(data_root.glob('*/*'))
    all_image_paths = [str(path) for path in all_image_paths]
    random.shuffle(all_image_paths)
    
    # Labels
    label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
    label_to_index = dict((name, index) for index,name in enumerate(label_names))
    all_image_labels = [label_to_index[pathlib.Path(path).parent.name]
                    for path in all_image_paths]

    ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))
    image_label_ds = ds.map(load_and_preprocess_from_path_label)
    
    # Extra info
    image_count = len(all_image_paths)
    print(f'Number of images: {image_count}')
    print(f'Label names: {label_names}')
    return image_label_ds, label_names, image_count

def dataset_split(ds, image_count, test=0.05, dev=0.05, train=0.9):
    assert (test + dev + train == 1), 'test + dev + train should be equal to 1'
    nt, nd = int(test*image_count), int(dev*image_count)
    ntr = image_count - nt - nd
    ds_test = ds.take(nt) 
    ds_dev = ds.skip(nt).take(nd) 
    ds_train = ds.skip(nt + nd).take(ntr)
    return ds_test, ds_dev, ds_train

In [114]:
# Dataset of preprocessed images
ds, labels, image_count = load_training_images(data_root)
ds_test, ds_dev, ds_train = dataset_split(ds, image_count)

Number of images: 1056
Label names: ['ethernet']


In [117]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_example(feature0, feature1):
  """
  Creates a tf.Example message ready to be written to a file.
  """
  # Create a dictionary mapping the feature name to the tf.Example-compatible
  # data type.
  feature = {
      'feature0': _bytes_feature(tf.io.serialize_tensor(feature0).numpy()),
      'feature1': _int64_feature(feature1)
  }
  
  # Create a Features message using tf.train.Example.
  example_proto = tf.train.Example(
      features=tf.train.Features(
          feature=feature))
  return example_proto.SerializeToString()

def tf_serialize_example(f0,f1):
  tf_string = tf.py_function(
    serialize_example, 
    (f0,f1),  # pass these args to the above function.
    tf.string)      # the return type is <a href="../../api_docs/python/tf#string"><code>tf.string</code></a>.
  return tf.reshape(tf_string, ()) # The result is a scalar

def store_dataset_as_tfrecord(filename, ds):
    ds_serialized = ds.map(tf_serialize_example)
    writer = tf.data.experimental.TFRecordWriter(filename)
    writer.write(ds_serialized)

In [121]:
store_dataset_as_tfrecord(os.path.join(ANNOTATIONS_PATH, 'train.record'), ds_train)
store_dataset_as_tfrecord(os.path.join(ANNOTATIONS_PATH, 'dev.record'), ds_dev)



In [109]:
def import_data():
    pass

def convert_data_to_tfrecord():
    pass

In [16]:
for f0,f1 in ds.take(1):
  print(f0)
  print(f1)
    

tf.Tensor(
[[[1.         0.9882353  0.96862745]
  [1.         0.9882353  0.96862745]
  [1.         0.99215686 0.9529412 ]
  ...
  [0.7490196  0.69411767 0.6313726 ]
  [0.7529412  0.69803923 0.63529414]
  [0.76862746 0.6862745  0.6509804 ]]

 [[1.         0.9882353  0.96862745]
  [1.         0.9882353  0.96862745]
  [1.         0.99215686 0.9529412 ]
  ...
  [0.7921569  0.73333335 0.654902  ]
  [0.77843136 0.7078431  0.6431373 ]
  [0.7862745  0.7019608  0.66862744]]

 [[1.         1.         0.9137255 ]
  [1.         1.         0.92156863]
  [1.         1.         0.90588236]
  ...
  [0.89411765 0.8039216  0.68235296]
  [0.8627451  0.78039217 0.68235296]
  [0.8666667  0.76862746 0.7019608 ]]

 ...

 [[0.5137255  0.49019608 0.47058824]
  [0.5803922  0.54901963 0.5235294 ]
  [0.6117647  0.57843137 0.5627451 ]
  ...
  [0.6431373  0.61960787 0.6117647 ]
  [0.6019608  0.57843137 0.57843137]
  [0.6627451  0.63529414 0.6431373 ]]

 [[0.6        0.5764706  0.5568628 ]
  [0.59607846 0.5647059  0