In [24]:
from matplotlib import pyplot as plt
from matplotlib import image as mpimg
import os
from tqdm import tqdm
from skimage.transform import resize
from scipy import ndimage
from skimage.morphology import label
import numpy as np
import pandas as pd
%matplotlib inline

In [25]:
image_heigth = 256
image_width = 256

mask_heigth = 256
mask_width = 256

def get_images_masks(root_dir):

    images = []
    for image_folder in tqdm(os.listdir(root_dir)):
        image_id = image_folder
        image_dir = root_dir + image_folder
        image_file = image_dir + '/images/' + image_id + '.png'
        image = mpimg.imread(image_file)
        #image = resize(image, (image_heigth, image_width), mode='constant', preserve_range=True)
        masks = np.zeros((image.shape[0],image.shape[1]))
        masks_dir = image_dir + '/masks/'
        
        for mask in os.listdir(masks_dir):
            mask_file = masks_dir + mask
            mask = mpimg.imread(mask_file)
            #mask = resize(mask, (mask_heigth,mask_width), mode='constant', preserve_range=True)
            masks = np.add(masks,mask)
        
        images.append({'image_id': image_id, 'image': image,'masks': masks })
        
    return images

def plot_images(rows,cols,images):
    image_index = 0
    fig, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(30,30), squeeze=False)
    for r in range(rows):
        for c in range(cols):
            ax[r][c].imshow(images[image_index])
            image_index += 1

In [26]:
images = get_images_masks("./input/stage1_train/")

100%|██████████| 670/670 [01:01<00:00, 10.84it/s]


In [27]:
def separate_masks(combined_masks):
    labeled_masks = label(combined_masks)
    masks = []
    for i in range(1,np.max(labeled_masks)+1):
        mask = (labeled_masks == i).astype(int)
        masks.append(mask)
    
    return masks

# def separate_masks(combined_masks):
#     labeled_masks = ndimage.label(combined_masks)
#     masks = []
#     for i in range(1,labeled_masks[1]+1):
#         mask = (labeled_masks[0] == i).astype(int)
#         masks.append(mask)
    
#     return masks


def identify_postive_pixels(mask):

    return list(np.argwhere(mask.T.flatten()>0)[:,0])


def encode_postive_pixels(postive_pixels):
    EncodedPixels = []
    for i in range(len(postive_pixels)):
        if(postive_pixels[i] - 1 in postive_pixels):
            EncodedPixels[-1][1] += 1


        else:                 
            EncodedPixels.append([postive_pixels[i]+1,1])
            
    encodedPixels = ''    
    for item in EncodedPixels:
        for i in item:
            encodedPixels = encodedPixels + str(i) + ' '
           

    return encodedPixels

def create_submission_dataFrame(test_images,resized_masks):
    submission = []
    for index in tqdm(range(len(test_images))):
        image_id = test_images[index]['image_id']
        masks = separate_masks(resized_masks[index])
        for j in range(len(masks)):
            postive_pixels = identify_postive_pixels(masks[j])
            encoded_pixels = encode_postive_pixels(postive_pixels)
            submission.append([image_id, encoded_pixels])
    return pd.DataFrame(np.array(submission),columns=['ImageId','EncodedPixels'])

In [28]:
masks = [image['masks'] for image in images]

In [29]:
stage1_train = create_submission_dataFrame(images,masks)
stage1_train.to_csv('my_encoded_pixels.csv',index=False)

100%|██████████| 670/670 [05:18<00:00,  2.10it/s]


In [30]:
test_ids = [image['image_id'] for image in images]
preds_test_upsampled = masks


# Run-length encoding stolen from https://www.kaggle.com/rakhlin/fast-run-length-encoding-python
def rle_encoding(x):
    dots = np.where(x.T.flatten() == 1)[0]
    run_lengths = []
    prev = -2
    for b in dots:
        if (b>prev+1): run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def prob_to_rles(x, cutoff=0.5):
    lab_img = label(x > cutoff)
    for i in range(1, lab_img.max() + 1):
        yield rle_encoding(lab_img == i)
        
        
        

new_test_ids = []
rles = []
for n, id_ in enumerate(test_ids):
    rle = list(prob_to_rles(preds_test_upsampled[n]))
    rles.extend(rle)
    new_test_ids.extend([id_] * len(rle))
    
    
# Create submission DataFrame
sub = pd.DataFrame()
sub['ImageId'] = new_test_ids
sub['EncodedPixels'] = pd.Series(rles).apply(lambda x: ' '.join(str(y) for y in x))
sub.to_csv('stolen_encoded_pixels.csv', index=False)

In [None]:
ImageId = '04acab7636c4cf61d288a5962f15fa456b7bde31a021e5deedfbf51288e4001e'

the_image = filter(lambda x: x['image_id'] == ImageId, images)[0]

In [None]:
the_image_mask = the_image['masks']

In [None]:
plt.imshow(the_image_mask)

In [None]:
the_image_masked_scipy = ndimage.label(the_image_mask)

In [None]:
print(the_image_masked[1])

In [None]:
the_image_skimage = label(the_image_mask)

In [None]:
np.max(the_image_skimage)