In [1]:
import pathlib
import imageio
import numpy as np

training_paths = pathlib.Path('D:/Projects/starik/stage1_train').glob('*/images/*.png')
training_sorted = sorted([x for x in training_paths])
im_path = training_sorted[45]
im = imageio.imread(str(im_path))

In [3]:
print('Original image shape: {}'.format(im.shape))

from skimage.color import rgb2gray
im_gray = rgb2gray(im)
print('New image shape: {}'.format(im_gray.shape))

Original image shape: (520, 696, 4)
New image shape: (520, 696)


In [4]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.imshow(im)
plt.axis('off')
plt.title('Original Image')

plt.subplot(1, 2, 2)
plt.imshow(im_gray, cmap='gray')
plt.axis('off')
plt.title('Grayscale')

plt.tight_layout()
plt.show()


In [5]:
from skimage.filters import threshold_otsu

thresh_val = threshold_otsu(im_gray)
mask = np.where(im_gray > thresh_val, 1, 0)

if np.sum(mask == 0) < np.sum(mask == 1):
    mask = np.where(mask, 0, 1)


In [6]:
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
im_pixels = im_gray.flatten()
plt.hist(im_pixels, bins=50)
plt.vlines(thresh_val, 0, 100000, linestyle='--')
plt.ylim([0, 50000])
plt.title('Histogram')

plt.subplot(1, 2, 2)
mask_for_display = np.where(mask, mask, np.nan)
plt.imshow(im_gray, cmap='gray')
plt.imshow(mask_for_display, cmap='rainbow', alpha=0.5)
plt.axis('off')
plt.title('Masked')

plt.show()


In [7]:
from scipy import ndimage
labels, nlabels = ndimage.label(mask)

label_arrays = []
for label_num in range(1, nlabels+1):
    label_mask = np.where(labels == label_num, 1, 0)
    label_arrays.append(label_mask)

print('{} objects was found.'.format(nlabels))

76 objects was found.


In [8]:
from matplotlib.colors import ListedColormap

color_map = ListedColormap(np.random.rand(256, 3))

labels_for_display = np.where(labels > 0, labels, np.nan)
plt.imshow(im_gray, cmap='gray')
plt.imshow(labels_for_display, cmap=color_map)
plt.axis('off')
plt.title('Separate cells ({} nuclei)'.format(nlabels))
plt.show()


In [9]:
for label_ind, label_coords in enumerate(ndimage.find_objects(labels)):
    cell = im_gray[label_coords]
    
    if np.product(cell.shape) < 10: 
        print('Dropping label {}. Too small.'.format(label_ind))
        mask = np.where(labels==label_ind+1, 0, mask)

labels, nlabels = ndimage.label(mask)
print('There are now {} separate components / objects detected.'.format(nlabels))


Dropping label 4. Too small.
Dropping label 5. Too small.
Dropping label 7. Too small.
Dropping label 8. Too small.
Dropping label 9. Too small.
Dropping label 10. Too small.
Dropping label 14. Too small.
Dropping label 15. Too small.
Dropping label 16. Too small.
Dropping label 19. Too small.
Dropping label 21. Too small.
Dropping label 22. Too small.
Dropping label 23. Too small.
Dropping label 60. Too small.
Dropping label 61. Too small.
Dropping label 72. Too small.
There are now 60 separate components / objects detected.


In [10]:
fig, axes = plt.subplots(1, 6, figsize=(10, 6))

for ii, obj_indices in enumerate(ndimage.find_objects(labels)[0:6]):
    cell = im_gray[obj_indices]
    axes[ii].imshow(cell, cmap='gray')
    axes[ii].axis('off')
    axes[ii].set_title('Label #{}\nSize: {}'.format(ii + 1, cell.shape))

plt.tight_layout()
plt.show()


In [11]:
two_cell_indices = ndimage.find_objects(labels)[1]
cell_mask = mask[two_cell_indices]
cell_mask_opened = ndimage.binary_opening(cell_mask, iterations=8)


In [12]:
fig, axes = plt.subplots(1,4, figsize=(12,4))

axes[0].imshow(im_gray[two_cell_indices], cmap='gray')
axes[0].set_title('Original object')
axes[1].imshow(mask[two_cell_indices], cmap='gray')
axes[1].set_title('Original mask')
axes[2].imshow(cell_mask_opened, cmap='gray')
axes[2].set_title('Opened mask')
axes[3].imshow(im_gray[two_cell_indices]*cell_mask_opened, cmap='gray')
axes[3].set_title('Opened object')


for ax in axes:
    ax.axis('off')
plt.tight_layout()
plt.show()

In [13]:
def rle_encoding(x):
    dots = np.where(x.T.flatten()==1)[0] # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if (b > prev + 1): run_lengths.extend((b+1, 0))
        run_lengths[-1] += 1
        prev = b
    return " ".join([str(i) for i in run_lengths])

print('RLE Encoding for the current mask is: {}'.format(rle_encoding(label_mask)))

RLE Encoding for the current mask is: 210075 6 210593 8 211112 9 211631 10 212150 11 212669 12 213189 12 213709 12 214228 13 214748 13 215268 13 215788 13 216308 13 216828 13 217348 13 217869 12 218389 12 218909 12 219430 11 219950 11 220471 10 220991 10 221512 9 222033 8 222554 7 223075 6 223596 5 224117 4 224639 2


In [14]:
import pandas as pd

def analyze_image(im_path):
    im_id = im_path.parts[-3]
    im = imageio.imread(str(im_path))
    im_gray = rgb2gray(im)
    
    thresh_val = threshold_otsu(im_gray)
    mask = np.where(im_gray > thresh_val, 1, 0)
    if np.sum(mask==0) < np.sum(mask==1):
        mask = np.where(mask, 0, 1)    
        labels, nlabels = ndimage.label(mask)
    labels, nlabels = ndimage.label(mask)
    
    im_df = pd.DataFrame()
    for label_num in range(1, nlabels+1):
        label_mask = np.where(labels == label_num, 1, 0)
        if label_mask.flatten().sum() > 10:
            rle = rle_encoding(label_mask)
            s = pd.Series({'ImageId': im_id, 'EncodedPixels': rle})
            im_df = im_df.append(s, ignore_index=True)
    
    return im_df


def analyze_list_of_images(im_path_list):
    all_df = pd.DataFrame()
    for im_path in im_path_list:
        im_df = analyze_image(im_path)
        all_df = all_df.append(im_df, ignore_index=True)
    
    return all_df

In [15]:
testing = pathlib.Path('D:/Projects/starik/stage1_test').glob('*/images/*.png')
df = analyze_list_of_images(list(testing))
df.to_csv('submission.csv', index=None)

In [17]:
import cv2
import math
import os
import json

In [18]:
TRAIN_PATH = 'D:/Projects/starik/stage1_train'
TEST_PATH = 'D:/Projects/starik/stage1_test'

RANDOM_SEED = 75

OUTPUT_PATH = 'D:/Projects/starik/Output'
CONTOUR_EXTRACT_MODE = cv2.RETR_TREE


In [19]:
train_ids = [x for x in os.listdir(TRAIN_PATH)]
test_ids = [x for x in os.listdir(TEST_PATH)]

In [20]:
df2 = pd.DataFrame({'id': train_ids, 'train_or_test': 'train'})
df2= df2.append(pd.DataFrame({'id': test_ids, 'train_or_test': 'test'}))

print(df2.groupby(['train_or_test']).count())

                id
train_or_test     
test            65
train          670


In [21]:
df2['path'] = df2.apply(lambda x: 'D:/Projects/starik/stage1_{}/{}/images/{}.png'.format(x[1], x[0], x[0]), axis=1)

In [22]:
from sklearn.cluster import KMeans

def centroid_histogram(clt):
    numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)
    (hist, _) = np.histogram(clt.labels_, bins=numLabels)

    hist = hist.astype("float")
    hist /= hist.sum()

    return hist


In [23]:
def get_image_info(path, clusters=2):
    image = cv2.imread(path)
    height, width, _ = image.shape
    image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
    image = image.reshape((image.shape[0] * image.shape[1], 3))
    clt = KMeans(n_clusters=clusters)
    clt.fit(image)
    hist = centroid_histogram(clt)

    bg_idx, fg_idx = 0, clusters - 1
    if hist[bg_idx] < hist[fg_idx]:
        bg_idx, fg_idx = clusters - 1, 0

    bg_red, bg_green, bg_blue = clt.cluster_centers_[bg_idx]
    fg_red, fg_green, fg_blue = clt.cluster_centers_[fg_idx]

    bg_color = sum(clt.cluster_centers_[bg_idx]) / 3
    fg_color = sum(clt.cluster_centers_[fg_idx]) / 3
    max_color_pct = hist[bg_idx]
    min_color_pct = hist[fg_idx]

    return (pd.Series([height, width,
                       bg_red, bg_green, bg_blue, bg_color,
                       fg_red, fg_green, fg_blue, fg_color,
                       hist[bg_idx], hist[fg_idx],
                       fg_color < bg_color]))

In [47]:
image_info = os.path.join(OUTPUT_PATH, 'info.csv')

if os.path.isfile(image_info):
    with open(image_info, 'r') as datafile:
        data = json.load(datafile)
        df = pd.read_json(path_or_buf=data, orient='records')
        data = None
else:
    names = ['height', 'width',
             'bg_red', 'bg_green', 'bg_blue', 'bg_color',
             'fg_red', 'fg_green', 'fg_blue', 'fg_color',
             'bg_color_pct', 'fg_color_pct', 'invert']

    df[names] = df['path'].apply(lambda x: get_image_info(x))
    df['shape'] = df[['height', 'width']].apply(lambda x: '{:04d}x{:04d}'.format(x[0], x[1]), axis=1)

    with open(image_info, 'w') as outfile:
        json.dump(df.to_json(orient='records'), outfile)
