In [1]:
#| default_exp trf_record

# TRFrecord Files Convertor

Step 3 - Create a TFRecord file
In this step you need to convert the original dataset (with the splits) into a TFRecord format, which is documented here:
https://www.tensorflow.org/tutorials/load_data/tfrecord 

Please create a python script that reads the original files and creates a single tf.data.Example record for each input image.

The image bytes should be stored as a tf.train.BytesList feature, when the contents is an array of [H, W, C=13] uint16 bytes (use numpy’s tobytes) method, or as a tf.train.Int64List.

The labels should be stored in a format similar to the image, as an array of [H,W,C]
Optional: Add a colab to visualize the contents of the TFRecord file.


In [2]:
#|hide
from nbdev.showdoc import *

In [3]:
#|export
import tensorflow as tf
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


from flood_exercise import utils_func as util
from flood_exercise import const_vals as CONST




In [13]:
#|export


class ConvertTRF():


  def __init__(self,
    path_to_tif : str , #path to GeoTIFF files
    path_save_trf : str , # path to the folder that will store the result trf Records files
    ):

    self.list_of_tif = util.load_list_paths(path = path_to_tif , filter_file = False)
    self.path_save_trf = path_save_trf

    for tif in self.list_of_tif:
      self._convert_img_to_feature_(path_to_img = tif)
    


  def _convert_img_to_feature_(self, 
                               path_to_img : str , #path to specific image
                               ):
    with rasterio.open(path_to_img) as src:

      file_name = path_to_img.split(CONST.SPLIT_TILES_NAMES_STR1)[-1].split(CONST.SPLIT_TILES_NAMES_STR3)[0]

      bands = src.descriptions
      arr = src.read()

      # convert to dataframe in order to convert later to feature #TODO - MAKE IT OWKRS WITH NUMPY ARRAY
      df = pd.DataFrame(arr.reshape([arr.shape[0],-1]).T)
      df.columns = bands

      # self.bands_dict = {key: None for key in bands}
      bands_dict = {}
      for b in bands:
        band_list = tf.train.FloatList(value=df[b].tolist())
        band_vals = tf.train.Feature(float_list=band_list)
        bands_dict[b] = band_vals

      bands_data = tf.train.Features(feature=bands_dict)
      
      example = tf.train.Example(features=bands_data)
      
      with tf.io.TFRecordWriter(self.path_save_trf + CONST.SPLIT_TILES_NAMES_STR1+file_name + CONST.TRF_FILE_SUFFIX) as tfrecord_writer:
        tfrecord_writer.write(example.SerializeToString())

        print(f'saved tfrecord at :  {self.path_save_trf + CONST.SPLIT_TILES_NAMES_STR1+file_name + CONST.TRF_FILE_SUFFIX}')


In [14]:
instance =  ConvertTRF(path_to_tif= r"D:\git\flood_exercise\S2" ,
                       path_save_trf= r"D:\git\flood_exercise\RESULTS\trfRecords")




saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Nigeria_417184_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Mekong_1396181_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Mekong_1191208_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Mekong_1248200_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\India_900498_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Mekong_1149855_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\India_91379_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\India_943439_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Nigeria_952958_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Mekong_293769_S2Hand.tfrecord
saved tfrecord at :  D:\git\flood_exercise\RESULTS\trfRecords\Nigeria_22088_S2