In [2]:
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
import sys
import os
import time
import sklearn
import numpy as np
import pandas as pd

%matplotlib inline

In [6]:
#tf_record file format
# ->tf.train.Example
#  ->tf.train.features ->{"key":tf.train.Feature}
#    ->tf.train.features ->tf.train.ByteList/FloatList/Int64List

favoriate_books = [name.encode("utf-8") 
                   for name in ["python","deep_learning"]]

favoriate_books_bytelist = tf.train.BytesList(value = favoriate_books)

print(favoriate_books_bytelist)

hours_floatList = tf.train.FloatList(value = [9.0,12.5,8.0,5.5])

print(hours_floatList)

age_int64list = tf.train .Int64List(value = [42])

print(age_int64list)

features = tf.train.Features(
           feature = {
               "favoriate_books":tf.train.Feature(
               bytes_list = favoriate_books_bytelist),
               "hours":tf.train.Feature(
               float_list = hours_floatList),
               "age":tf.train.Feature(
               int64_list = age_int64list),
           }
)

print(features)

value: "python"
value: "deep_learning"

value: 9.0
value: 12.5
value: 8.0
value: 5.5

value: 42

feature {
  key: "age"
  value {
    int64_list {
      value: 42
    }
  }
}
feature {
  key: "favoriate_books"
  value {
    bytes_list {
      value: "python"
      value: "deep_learning"
    }
  }
}
feature {
  key: "hours"
  value {
    float_list {
      value: 9.0
      value: 12.5
      value: 8.0
      value: 5.5
    }
  }
}



In [7]:
example = tf.train.Example(features = features)

print(example)

serialized_example = example.SerializeToString()

print(serialized_example)


features {
  feature {
    key: "age"
    value {
      int64_list {
        value: 42
      }
    }
  }
  feature {
    key: "favoriate_books"
    value {
      bytes_list {
        value: "python"
        value: "deep_learning"
      }
    }
  }
  feature {
    key: "hours"
    value {
      float_list {
        value: 9.0
        value: 12.5
        value: 8.0
        value: 5.5
      }
    }
  }
}

b'\n[\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x00\x00\x10A\x00\x00HA\x00\x00\x00A\x00\x00\xb0@\n,\n\x0ffavoriate_books\x12\x19\n\x17\n\x06python\n\rdeep_learning\n\x0c\n\x03age\x12\x05\x1a\x03\n\x01*'


In [8]:
output_dir = "tfrecord_basic"

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
filename = "test.tfrecords"
filename_fullpath = os.path.join(output_dir,filename)

with tf.io.TFRecordWriter(filename_fullpath) as writer:
    for i in range(3):
        writer.write(serialized_example)
        

In [9]:
dataset = tf.data.TFRecordDataset([filename_fullpath])

for serialized_example_tensor in dataset:
    print(serialized_example_tensor)

tf.Tensor(b'\n[\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x00\x00\x10A\x00\x00HA\x00\x00\x00A\x00\x00\xb0@\n,\n\x0ffavoriate_books\x12\x19\n\x17\n\x06python\n\rdeep_learning\n\x0c\n\x03age\x12\x05\x1a\x03\n\x01*', shape=(), dtype=string)
tf.Tensor(b'\n[\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x00\x00\x10A\x00\x00HA\x00\x00\x00A\x00\x00\xb0@\n,\n\x0ffavoriate_books\x12\x19\n\x17\n\x06python\n\rdeep_learning\n\x0c\n\x03age\x12\x05\x1a\x03\n\x01*', shape=(), dtype=string)
tf.Tensor(b'\n[\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x00\x00\x10A\x00\x00HA\x00\x00\x00A\x00\x00\xb0@\n,\n\x0ffavoriate_books\x12\x19\n\x17\n\x06python\n\rdeep_learning\n\x0c\n\x03age\x12\x05\x1a\x03\n\x01*', shape=(), dtype=string)


In [13]:
expect_features = {
    "favoriate_books":tf.io.VarLenFeature(dtype = tf.string),
    "hours":tf.io.VarLenFeature(dtype = tf.float32),
    "age":tf.io.FixedLenFeature([],dtype = tf.int64)
}

dataset = tf.data.TFRecordDataset([filename_fullpath])
for serialized_example_tensor in dataset:
    example = tf.io.parse_single_example(
        serialized_example_tensor,expect_features
    )
    print(example)
    

{'favoriate_books': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f8fc86ebbe0>, 'hours': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f8fc868f1d0>, 'age': <tf.Tensor: id=90, shape=(), dtype=int64, numpy=42>}
{'favoriate_books': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f8ff6201080>, 'hours': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f8fc86eb358>, 'age': <tf.Tensor: id=99, shape=(), dtype=int64, numpy=42>}
{'favoriate_books': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f8fc868f470>, 'hours': <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x7f8fc8524320>, 'age': <tf.Tensor: id=108, shape=(), dtype=int64, numpy=42>}


In [14]:
expect_features = {
    "favoriate_books":tf.io.VarLenFeature(dtype = tf.string),
    "hours":tf.io.VarLenFeature(dtype = tf.float32),
    "age":tf.io.FixedLenFeature([],dtype = tf.int64)
}

dataset = tf.data.TFRecordDataset([filename_fullpath])
for serialized_example_tensor in dataset:
    example = tf.io.parse_single_example(
        serialized_example_tensor,expect_features
    )
    books = tf.sparse.to_dense(example["favoriate_books"],
                               default_value = b"")
    for book in books:
        print(book.numpy().decode("UTF-8"))
    

python
deep_learning
python
deep_learning
python
deep_learning


In [15]:

filename_fullpath_zip = filename_fullpath + ".zip"

options = tf.io.TFRecordOptions(compression_type = "GZIP")


with tf.io.TFRecordWriter(filename_fullpath_zip,options) as writer:
    for i in range(3):
        writer.write(serialized_example)
        

In [17]:
dataset_zip = tf.data.TFRecordDataset([filename_fullpath_zip],
                                  compression_type="GZIP")
for serialized_example_tensor in dataset_zip:
    example = tf.io.parse_single_example(
        serialized_example_tensor,expect_features
    )
    books = tf.sparse.to_dense(example["favoriate_books"],
                               default_value = b"")
    for book in books:
        print(book.numpy().decode("UTF-8"))
    

python
deep_learning
python
deep_learning
python
deep_learning


In [20]:
%save setup_enviroment 2

File `setup_enviroment.py` exists. Overwrite (y/[N])?  y
The following commands were written to file `setup_enviroment.py`:
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
import sys
import os
import time
import sklearn
import numpy as np
import pandas as pd

get_ipython().run_line_magic('matplotlib', 'inline')
