In [2]:
import tensorflow as tf
import os
import glob
import numpy as np
import pandas as pd
tf.version.VERSION

'2.1.0'

In [3]:
data_dir = "./ICDAR15_Demo"

imgs = glob.glob(os.path.join(data_dir,'*.jpg'))
gts = glob.glob(os.path.join(data_dir,'*.txt'))

In [4]:
def read_img(path):
    img_raw = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img_raw)
    return img

img_ds = tf.data.Dataset.from_tensor_slices(imgs)
img_ds = img_ds.map(read_img)


gt_ds = []
for path in gts:
    with open(path, encoding='utf-8') as f:
        lines = f.readlines()
        line = [line.lstrip('\ufeff').rstrip('\n').split(',')[:8] for line in lines]
        gt_ds.append(line)

gt_ds = tf.ragged.constant(gt_ds)
gt_ds = tf.data.Dataset.from_tensor_slices(gt_ds)
gt_ds = gt_ds.map(lambda x:tf.strings.to_number(x.to_tensor(),tf.float32))

In [6]:
img_ds = img_ds.map(tf.io.serialize_tensor)
gt_ds = gt_ds.map(tf.io.serialize_tensor)

In [7]:
data_ds = tf.data.Dataset.zip((img_ds, gt_ds))

In [8]:
# writer = tf.data.experimental.TFRecordWriter('ds.tfrecord')
# writer.write(img_ds)

In [9]:
def _bytes_feature(value):
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [10]:
def create_example(img_raw, gt):
    
    feature = {
        'img_raw' : _bytes_feature(img_raw),
        'gt' : _bytes_feature(gt)
    }
    
    example = tf.train.Example(features = tf.train.Features(feature = feature))
    return example.SerializeToString()

def create_example_map(img_raw, gt):
    
    tf_string = tf.py_function(create_example, inp=(img_raw, gt), Tout=tf.string)
    
    return tf.reshape(tf_string, ())

In [11]:
data_ds = data_ds.map(create_example_map)

In [12]:
filename = 'test.tfrecord'
writer = tf.data.experimental.TFRecordWriter(filename)
writer.write(data_ds)

In [13]:
filenames = [filename]
raw_dataset = tf.data.TFRecordDataset(filenames)
raw_dataset

<TFRecordDatasetV2 shapes: (), types: tf.string>

In [23]:
for raw_record in raw_dataset.take(1):
    print(repr(raw_record))

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [14]:
feature_description = {
    'img_raw': tf.io.FixedLenFeature([], tf.string, default_value=''),
    'gt': tf.io.FixedLenFeature([], tf.string, default_value='')
}

def _parse_function(example_proto):
    return tf.io.parse_single_example(example_proto, feature_description)

parsed_dataset = raw_dataset.map(_parse_function)
parsed_dataset

<MapDataset shapes: {gt: (), img_raw: ()}, types: {gt: tf.string, img_raw: tf.string}>

In [33]:
for item in parsed_dataset.take(1):
    st = item.get('gt')
    tf.io.parse_tensor(st, tf.float32)
    
    it = item.get('img_raw')
    tf.io.parse_tensor(it, tf.uint8)

<tf.Tensor: shape=(7, 8), dtype=float32, numpy=
array([[377., 117., 463., 117., 465., 130., 378., 130.],
       [493., 115., 519., 115., 519., 131., 493., 131.],
       [374., 155., 409., 155., 409., 170., 374., 170.],
       [492., 151., 551., 151., 551., 170., 492., 170.],
       [376., 198., 422., 198., 422., 212., 376., 212.],
       [494., 190., 539., 189., 539., 205., 494., 206.],
       [374.,   1., 494.,   0., 492.,  85., 372.,  86.]], dtype=float32)>

<tf.Tensor: shape=(720, 1280, 3), dtype=uint8, numpy=
array([[[191, 152, 170],
        [185, 146, 164],
        [184, 145, 163],
        ...,
        [142, 115, 134],
        [142, 114, 136],
        [143, 115, 138]],

       [[190, 151, 169],
        [184, 148, 162],
        [184, 145, 163],
        ...,
        [142, 115, 132],
        [142, 114, 136],
        [143, 115, 138]],

       [[190, 154, 168],
        [183, 150, 161],
        [183, 147, 161],
        ...,
        [142, 115, 132],
        [142, 114, 136],
        [143, 115, 137]],

       ...,

       [[ 79,  69,  57],
        [ 81,  71,  59],
        [ 85,  75,  63],
        ...,
        [ 33,  28,  25],
        [ 31,  26,  23],
        [ 30,  25,  22]],

       [[ 78,  69,  52],
        [ 80,  71,  54],
        [ 85,  76,  61],
        ...,
        [ 35,  27,  24],
        [ 31,  26,  22],
        [ 30,  25,  21]],

       [[ 77,  69,  50],
        [ 80,  72,  53],
        [ 84,  75,  58],
        ...,
        [ 35,  27,  