In [15]:
import tensorflow as tf
import os

In [16]:
train_image_path = 'data/train_v2/'

csv_path = 'data_p/s.csv'

image_path = 'data_p/s/image/'
mask_path = 'data_p/s/mask/'

if not os.path.exists(image_path):
    os.makedirs(image_path)
if not os.path.exists(mask_path):
    os.makedirs(mask_path)

In [17]:
def csv_dataset(path):
    return tf.data.experimental.make_csv_dataset(
        path,
        batch_size=1, # required
        column_names=['ImageId', 'EncodedPixels'],
        num_epochs=1,
        shuffle=False,
    )

In [18]:
def rle_to_mask(encoded_pixels, image_height=768, image_width=768):
    mask = tf.zeros(image_height * image_width, dtype=tf.float32)

    # Convert string to integer tensor
    pairs = tf.strings.to_number(tf.strings.split(encoded_pixels), out_type=tf.int32)

    # Iterate over pairs and update mask
    for i in range(0, len(pairs), 2):
        start = pairs[i] - 1
        run_length = pairs[i + 1]

        indices = tf.range(start, start + run_length)
        updates = tf.ones(run_length, dtype=tf.float32)
        mask = tf.tensor_scatter_nd_update(mask, indices=tf.expand_dims(indices, axis=1), updates=updates)

    return  tf.transpose(tf.reshape(mask, (image_height, image_width)))

def process_image(file):
    img = tf.io.read_file(train_image_path + file)
    tf.io.write_file(image_path + file, img)
    return img

def process_mask(rle, file):
    mask = rle_to_mask(rle)
    encoded_mask = tf.io.serialize_tensor(mask)
    tf.io.write_file(mask_path + file, encoded_mask)
    return mask

def process_batch(csv_item):
    X = process_image(csv_item['ImageId'])
    y = process_mask(csv_item['EncodedPixels'], csv_item['ImageId'])
    return X, y

In [19]:
csv = csv_dataset(csv_path)

# use map in hacky way to process in parallel
r = csv.unbatch().map(process_batch)
# apply the function to each batch
for batch in r.batch(32):
    pass