In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import tifffile as tiff
import cv2
import os
from tqdm.notebook import tqdm
import tensorflow as tf
import gc
import rasterio
from rasterio.windows import Window
from tensorflow.data import Dataset

In [4]:
from model import utils

In [2]:
tile_size = 1024
s_th = 40  # saturation blanking threshold
p_th = 1000 * (tile_size // 256) ** 2  # threshold for minimul number of pixels

In [6]:
def encodings_to_mask(encodings, shape):
    image = np.zeros(np.prod(shape), dtype=np.uint8)
    for m, encoding in enumerate(encodings):
        if isinstance(encoding, np.float) and np.isnan(encoding):
            continue
        
        split = encoding.split()
        for i in range(0, len(split), 2):
            start, length = int(split[i]) - 1, int(split[i + 1])
            image[start: start + length] = 1 + m
    return image.reshape(shape).T


def mask_to_encodings(mask, n=1):
    pixels = mask.T.flatten()
    encodings = list()
    for i in range(1, n + 1):
        p = (pixels == i).astype(np.int8)
        if p.sum() == 0:
            encodings.append(np.nan)
        else:
            p = np.concatenate([[0], p, [0]])
            runs = np.where(p[1:] != p[:-1])[0] + 1
            encodings.append(' '.join(str(x) for x in runs))
    return encodings

In [5]:
df_masks = pd.read_csv(utils.TRAIN_PATH).set_index('id')
df_masks.head()

Unnamed: 0_level_0,encoding
id,Unnamed: 1_level_1
2f6ecfcdf,296084587 4 296115835 6 296115859 14 296147109...
8242609fa,96909968 56 96941265 60 96972563 64 97003861 6...
aaa6a05cc,30989109 59 31007591 64 31026074 68 31044556 7...
cb2d976f4,78144363 5 78179297 15 78214231 25 78249165 35...
b9a3865fc,61271840 4 61303134 13 61334428 22 61365722 30...
