## TFRecord Decoding Function

In [1]:
import os
import numpy as np
import xml.etree.ElementTree as ET
from collections import OrderedDict
import matplotlib.pyplot as plt
import pandas as pd
import glob
import io
from collections import namedtuple
from PIL import Image
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
image_feature_description = {
    'image/height': tf.io.FixedLenFeature(shape = (), dtype = np.int64),
    'image/width' : tf.io.FixedLenFeature(shape = (), dtype = np.int64),
    'image/filename' : tf.io.FixedLenFeature(shape = (), dtype = tf.string),
    'image/encoded' : tf.io.FixedLenFeature(shape = (), dtype = tf.string),
    'image/object/bbox/xmin': tf.io.FixedLenSequenceFeature(shape = (), dtype = np.float32, allow_missing = True),
    'image/object/bbox/xmax': tf.io.FixedLenSequenceFeature(shape = (), dtype = np.float32, allow_missing = True),
    'image/object/bbox/ymin': tf.io.FixedLenSequenceFeature(shape = (), dtype = np.float32, allow_missing = True),
    'image/object/bbox/ymax': tf.io.FixedLenSequenceFeature(shape = (), dtype = np.float32, allow_missing = True),
    'image/object/class/text':tf.io.FixedLenSequenceFeature(shape = (), dtype = tf.string, allow_missing = True),
    'image/object/class/label':tf.io.FixedLenSequenceFeature(shape = (), dtype = np.int64, allow_missing = True)
}

In [3]:
def _parse_data(unparsed_example):
    return tf.io.parse_single_example(unparsed_example, image_feature_description)

In [4]:
def _bytestring(parsed_example):
    byte_string = parsed_example['image/encoded']
    image = tf.io.decode_image(byte_string)
    image = tf.reshape(image, [parsed_example['image/height'], parsed_example['image/width'], 3])
    parsed_example['image/encoded'] = image
    bbox = tf.stack([parsed_example['image/object/bbox/ymin'], parsed_example['image/object/bbox/xmin'], parsed_example['image/object/bbox/ymax'], parsed_example['image/object/bbox/xmax']], axis = -1)
    output_dict = {'image': image,
                  'objects': {
                      'bbox': bbox,
                      'label':parsed_example['image/object/class/label']
                  }}

    return output_dict

In [5]:
def tfrecord_decoder(tfrecord_path):
    dataset = tf.data.TFRecordDataset(tfrecord_path)
    dataset = dataset.map(_parse_data)
    dataset = dataset.map(_bytestring)
    return dataset

In [15]:
dataset = tfrecord_decoder(r"F:\Minor Data Collection\Final Image Data\Monument Original\TFRecord\train.tfrecord")

In [44]:
from datetime import datetime

start = datetime.now()
print(dataset.reduce(0, lambda x, _ : x + 1).numpy())
end = datetime.now()
delta = end - start
print(f"Time difference is {delta.total_seconds() * 1000} ms")


5694
Time difference is 1046.9869999999999 ms


### Useless Undefined Code Section

In [1]:
# feature={
#         'image/height': int64_feature(height),
#         'image/width': int64_feature(width),
#         'image/filename': bytes_feature(filename),
#         'image/source_id': bytes_feature(filename),
#         'image/encoded': bytes_feature(encoded_jpg),
#         'image/format': bytes_feature(image_format),
#         'image/object/bbox/xmin': float_list_feature(xmins),
#         'image/object/bbox/xmax': float_list_feature(xmaxs),
#         'image/object/bbox/ymin': float_list_feature(ymins),
#         'image/object/bbox/ymax': float_list_feature(ymaxs),
#         'image/object/class/text': bytes_list_feature(classes_text),
#         'image/object/class/label': int64_list_feature(classes),
#     }

In [2]:
# features = tfds.features.FeaturesDict({
#     'image/height': tfds.features.Scalar(dtype = np.int64),
#     'image/width' : tfds.features.Scalar(dtype = np.int64),
#     'image/filename': tfds.features.Scalar(dtype = tf.string),
#     'image/source_id': tfds.features.Scalar(dtype = tf.string),
#     'image/encoded' : tfds.features.Image(shape = (300, 300, 3)),
#     'image/format' : tfds.features.Scalar(dtype = tf.string),
#     'image/object/bbox': tfds.features.Sequence({
#         'xmin': tfds.features.Tensor(shape=(), dtype = np.float32),
#         'xmax': tfds.features.Tensor(shape=(), dtype = np.float32),
#         'ymin': tfds.features.Tensor(shape=(), dtype = np.float32),
#         'ymax': tfds.features.Tensor(shape=(), dtype = np.float32),
#     }),
#     'image/object/class':tfds.features.Sequence({
#         'text': tfds.features.Tensor(shape = (), dtype = tf.string),
#         'label':tfds.features.Tensor(shape = (), dtype = np.int64)
        
#     })
    
# })

In [3]:
# split_infos = [
#     tfds.core.SplitInfo(
#         name = 'monument-train',
#         shard_lengths = [506],
#         num_bytes = 0),
#     tfds.core.SplitInfo(
#         name = 'monument-test',
#         shard_lengths = [173],
#         num_bytes = 0
#     )
# ]

In [4]:
# split_infos = tfds.folder_dataset.compute_split_info(out_dir = './Monument Dataset',
# filename_template = tfds.core.ShardedFileTemplate(data_dir = './Monument Dataset', template = '{SPLIT}.{FILEFORMAT}-{SHARD_X_OF_Y}')
# )

In [5]:
# tfds.folder_dataset.write_metadata(
#     data_dir = './Monument Dataset',
#     features = features,
#     split_infos = split_infos,
#     filename_template = '{SPLIT}.{FILEFORMAT}-{SHARD_X_OF_Y}'
# )

In [6]:
# builder = tfds.builder_from_directory('./Monument Dataset')
# builder.info.splits['monument-train'].num_examples

In [7]:
# ds = builder.as_dataset(split = 'monument-train')

In [None]:
# feature={
#         'image/height': int64_feature(height),
#         'image/width': int64_feature(width),
#         'image/filename': bytes_feature(filename),
#         'image/source_id': bytes_feature(filename),
#         'image/encoded': bytes_feature(encoded_jpg),
#         'image/format': bytes_feature(image_format),
#         'image/object/bbox/xmin': float_list_feature(xmins),
#         'image/object/bbox/xmax': float_list_feature(xmaxs),
#         'image/object/bbox/ymin': float_list_feature(ymins),
#         'image/object/bbox/ymax': float_list_feature(ymaxs),
#         'image/object/class/text': bytes_list_feature(classes_text),
#         'image/object/class/label': int64_list_feature(classes),
#     }

#         'height': int64_feature(height),
#         'width': int64_feature(width),
#         'filename': bytes_feature(filename),
#         'image': bytes_feature(encoded_jpg),
#         'object/bbox/xmin': float_list_feature(xmins),
#         'object/bbox/xmax': float_list_feature(xmaxs),
#         'object/bbox/ymin': float_list_feature(ymins),
#         'object/bbox/ymax': float_list_feature(ymaxs),
#         'object/class/text': bytes_list_feature(classes_text),
#         'object/class/label': int64_list_feature(classes),