In [1]:
import os
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
from scipy.stats.mstats import gmean

In [2]:
def RLenc(img, order='F', format=True):
    """
    img is binary mask image, shape (r,c)
    order is down-then-right, i.e. Fortran
    format determines if the order needs to be preformatted (according to submission rules) or not

    returns run length as an array or string (if format is True)
    """
    bytes = img.reshape(img.shape[0] * img.shape[1], order=order)
    runs = []  # list of run lengths
    r = 0  # the current run length
    pos = 1  # count starts from 1 per WK
    for c in bytes:
        if (c == 0):
            if r != 0:
                runs.append((pos, r))
                pos += r
                r = 0
            pos += 1
        else:
            r += 1

    # if last run is unsaved (i.e. data ends with 1)
    if r != 0:
        runs.append((pos, r))
        pos += r
        r = 0

    if format:
        z = ''

        for rr in runs:
            z += '{} {} '.format(rr[0], rr[1])
        return z[:-1]
    else:
        return runs

In [3]:
def find_empty_images(path):
    empty_list = []
    for img_fp in os.listdir(path):
        img = cv2.imread(os.path.join(path, img_fp))
        if (img != 0).sum() == 0:
            empty_list.append(img_fp)
    return empty_list

In [4]:
def merge_preds(image_id, res_dir):
    masks = []
    for fold in os.listdir(res_dir):
        mask = np.load(os.path.join(res_dir, fold, image_id + '.npy'))
        masks.append(mask)
    return gmean(masks)[0, 0, :, :]

In [5]:
test_empty = set(np.load('./data/test_empty.npy').tolist())

In [6]:
sample_subm = pd.read_csv('./data/sample_submission.csv')

In [7]:
sample_subm.head()

Unnamed: 0,id,rle_mask
0,155410d6fa,1 1
1,78b32781d1,1 1
2,63db2a476a,1 1
3,17bfcdb967,1 1
4,7ea0fd3c88,1 1


In [8]:
for i in tqdm(range(len(sample_subm))):
    image_id = sample_subm.iloc[i, 0]
    
    if image_id + '.png' in test_empty:
        sample_subm.iloc[i, 1] = ''
        continue
    
    mask = merge_preds(image_id, './results/')
    mask = cv2.resize(mask, (101, 101))
    
    mask[mask >= 0.5] = 1
    mask[mask < 0.5] = 0
    rle_mask = RLenc(mask)
    sample_subm.iloc[i, 1] = rle_mask


100%|██████████| 18000/18000 [26:24<00:00, 11.36it/s]


In [12]:
sample_subm.to_csv('./data/subm_1', index=False)

In [13]:
subm = pd.read_csv('./data/subm_1')

In [14]:
subm.head()

Unnamed: 0,id,rle_mask
0,155410d6fa,1 1009 1011 98 1112 97 1213 96 1314 95 1415 95...
1,78b32781d1,58 44 159 44 259 45 359 46 459 47 559 48 658 5...
2,63db2a476a,7269 4 7363 11 7461 14 7560 16 7659 18 7758 20...
3,17bfcdb967,4253 5 4352 8 4451 10 4499 6 4523 4 4547 16 45...
4,7ea0fd3c88,2 5 25 7 102 6 126 7 203 5 229 5 305 2 331 3 5...


In [8]:
mask.shape

(101, 101)

In [3]:
def create_pred(image_id):    
    if image_id + '.png' in test_empty:
        return image_id, ''
        
    
    mask = merge_preds(image_id, './results/')
    mask = cv2.resize(mask, (101, 101))
    
    mask[mask >= 0.5] = 1
    mask[mask < 0.5] = 0
    rle_mask = RLenc(mask)
    return image_id, rle_mask


In [None]:
res = []
with ProcessPoolExecutor(max_workers=4) as executor:
    for image_id, rle_mask in executor.map(create_pred, image_id):
        res.append([image_id, rle_mask])