# Why Use TFRecords?
- to use TPUs more efficiently
- seperate data preparation from model crafting
- prepare data once then focus on optimizing training workflow

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

# geospatial libraries
import geopandas as gpd
import rasterio as rs
from rasterio import features as feat
from rasterio.plot import show

# polygon creation
from shapely.geometry import Point, Polygon

# plotting
import matplotlib.pyplot as plt

# Loading split
- read more on image tiling of this dataset [here](https://www.kaggle.com/sandhiwangiyana/sn6-splitting-image-tiles/)
- you can't create a validation split by choosing random image_id since most tiles are overlaping -> data leak
- the best way is to choose an area in the map and grab some tiles that have minimum overlaping with other tiles
- in my notebook above, I split the region into 10 area and group tiles that are >50% inside each area. Some data leak might happen but we should also consider that some portion of the image is the `no-data` region (black part)

In [None]:
SPLIT_SET = np.load('../input/spacenet-6-image-splits/SN6_10_splits.npy', allow_pickle=True)
print(f'num of splits: {len(SPLIT_SET)}')

# TFRecords
- a tfrecord file contains some examples. examples are data features and additional labels that we want to integrate
- in this notebook, for every example we'll integrate the optical PS-RGB image, annotation mask, image id and total number of building
- the raster and mask will be stored as BytesList, therefore we need to encode both image as strings

In [None]:
# global params
ROOT_DIR = '../input/spacenet-6-multisensor-allweather-mapping/AOI_11_Rotterdam/'
MODE = 'PS-RGB'
IMAGE_SIZE = (768,768)
IMAGE_CH = 3

SAR_CH = [1,4,3]  # choose which SAR channel to extract, here I take HH,VV,VH

In [None]:
def get_image_path(image_id):
    return f'{ROOT_DIR}{MODE}/SN6_Train_AOI_11_Rotterdam_{MODE}_{image_id}.tif'

def norm(plane):
    """make sure if max val exceeds given 92.88, it won't result in value >255
    and making it become 0 when converted to uint8 bcz of overflow
    
    used only for sar images since their values are floats
    
    Parameters:
    -----------
    plane: numpy array of any size and dimension
    """
    max_val = plane.max() if plane.max()>92.88 else 92.88
    plane = plane / max_val * 255
    return plane.astype(np.uint8)

## Load binary mask
- 1: buildings, 0: background
- annotations are obtained from respective geojson files

In [None]:
def get_binary_mask(image_id, raster):
    # get geojson file for a given tile
    geo_path = f'{ROOT_DIR}geojson_buildings/SN6_Train_AOI_11_Rotterdam_Buildings_{image_id}.geojson'
    gdf = gpd.read_file(geo_path)
    num_buildings = gdf.shape[0]
    
    # handle error when no buildings are present in a tile
    if num_buildings==0:
        mask = np.zeros(IMAGE_SIZE)
    else:
        # create binary mask, convert to uint8, resize
        mask = feat.geometry_mask(
            gdf.geometry,
            out_shape=(raster.height, raster.width), # original wxh (900,900)
            transform=raster.transform,
            invert=True  # makes pixel buildings == 1
        )

    return mask, num_buildings

## Load image with mask
- both image and mask are encoded with png
- you could use jpeg to get smaller file size for image but note it only supports 3 channel
- you can add any preprocessing in this stage, but during training (using TPU), you can only do preprocessing with numpy or tensorflow

In [None]:
def get_mask_string(mask):
    # encode mask to png
    mask = mask.astype(np.uint8)
    mask = np.expand_dims(mask, axis=2)  # result: (w,h,1)
    mask = tf.image.resize(mask, size=IMAGE_SIZE, method='nearest')
    return tf.io.encode_png(mask)

def get_image_mask(image_id):
    """Takes an image id, ex: '20190822065725_20190822065959_tile_7283'

    returns:
        image (string), mask (string), num_building
    """
    # read image with rasterio
    raster = rs.open(get_image_path(image_id))

    # grab binary mask
    mask, num_building = get_binary_mask(image_id, raster)
    mask = get_mask_string(mask)

    # convert to np array, change to uint8 (encoding doesn't support float), encode to jpeg
    if MODE == 'SAR-Intensity':
        # read desired channels and size, then normalize
        image = norm(raster.read(indexes=SAR_CH, out_shape=IMAGE_SIZE))

    # if non sar image, convert to uint8 when reading
    else:
        image = raster.read(out_dtype='uint8', out_shape=IMAGE_SIZE) 

    img = rs.plot.reshape_as_image(image)  # fix dimension order, res (w,h,ch)
    img = tf.io.encode_png(img)

    return img, mask, num_building

# Preview image-mask

In [None]:
image, mask, num_building = get_image_mask('20190822065725_20190822065959_tile_7283')

f,(ax1,ax2) = plt.subplots(1,2,figsize=(10,5))
ax1.imshow(tf.io.decode_png(image))
ax2.imshow(tf.io.decode_png(mask))
ax2.set_title(f'buildings: {num_building}')
plt.show()

# Create TFRecord

In [None]:
# TFRecord data type
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList won't unpack a string from an EagerTensor. intended for the image data
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

In [None]:
def create_tfrecord():
    print(f'using {IMAGE_SIZE[0]}x{IMAGE_SIZE[0]} resolution on {MODE} images')

    # create tfrecords for each split
    for n, image_ids in enumerate(SPLIT_SET):
        print(f'writing split {n+1} of {len(SPLIT_SET)}')
        fn = f'{MODE}{n+1}-{len(image_ids)}.tfrec'

        with tf.io.TFRecordWriter(fn) as writer:
            for k,image_id in enumerate(image_ids):
                image_str, mask_str, num_building = get_image_mask(image_id)

                feature = {
                    'image': _bytes_feature(image_str),
                    'mask': _bytes_feature(mask_str),
                    'file_name': _bytes_feature(tf.compat.as_bytes(image_id)),
                    'building': _int64_feature([num_building]),  # single value needs to be arrayed
                }
                
                # write tfrecords
                example = tf.train.Example(features=tf.train.Features(feature=feature))
                writer.write(example.SerializeToString())
                
                # report each 50th image
                if k%50==0:
                    print(k)

In [None]:
# create tf records for optical RGB
MODE = 'PS-RGB'
create_tfrecord()

# create tf records for SAR
MODE = 'SAR-Intensity'
create_tfrecord()

# Load TFRecord
Here's how you load and preview data from TFRecord

In [None]:
tfrec_format = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'mask': tf.io.FixedLenFeature([], tf.string),
    'file_name': tf.io.FixedLenFeature([], tf.string),
    'building': tf.io.FixedLenFeature([], tf.int64),
}

def parse_example(feature):
    features = tf.io.parse_single_example(feature, tfrec_format)
    
    # decode image and mask
    image = tf.image.decode_png(features['image'])
    mask = tf.image.decode_png(features['mask'])
    
    data = {}  # dict with file name and building count
    data['file_name'] = tf.cast(features['file_name'], tf.string)
    data['num_building'] = tf.cast(features['building'], tf.int32)

    return image, mask, data

In [None]:
filenames = tf.io.gfile.glob('./*.tfrec')

# load 1 file as TF Dataset
dataset = tf.data.TFRecordDataset(filenames[0])

# take 1 example and parse
for example in dataset.take(1):
    image, mask, data = parse_example(example)
    
    # plot example
    f,(ax1,ax2) = plt.subplots(1,2,figsize=(10,5))
    ax1.imshow(image)
    ax1.set_title(data['file_name'].numpy())
    ax2.imshow(mask)
    ax2.set_title(f'buildings: {data["num_building"]}')
    plt.show()