# Preprocessing input images and annotations

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

import skimage.io
import skimage.segmentation
import skimage.filters

import scipy.stats

import random

import os
import shutil

import joblib
import warnings

In [2]:
debug = False

# Parameters

In [3]:
# assume a nucleus is at least 10 by 10 pixels big
min_nucleus_size = 10**2

# Transform gray scale TIF images to PNG
transform_images_to_PNG = True

# Pixels of the boundary (min 2 pixels)
boundary_size = 4

In [4]:
# general
dir_root = '/data1/image-segmentation/BBBC022/'

# raw data
dir_raw_images = dir_root + 'raw_images/'
dir_raw_annotations = dir_root + 'raw_annotations/'

# Split files
path_files_training = dir_root + 'unet/training.txt'
path_files_validation = dir_root + 'unet/validation.txt'
path_files_test = dir_root + 'unet/test.txt'

In [5]:
# Output directories

## split folders
dir_training = dir_root + 'unet/split/training/'
dir_validation = dir_root + 'unet/split/validation/'
dir_test = dir_root + 'unet/split/test/'

## boundary experiment
dir_boundary_labels = dir_root + 'unet/y/'

## input data, normalized and 8 bit
dir_images_normalized_8bit = dir_root + 'unet/x/'

# Create directories
os.makedirs(dir_training, exist_ok=True)
os.makedirs(dir_validation, exist_ok=True)
os.makedirs(dir_test, exist_ok=True)
os.makedirs(dir_boundary_labels, exist_ok=True)
os.makedirs(dir_images_normalized_8bit, exist_ok=True)

## Create Output Targets: Three Class Boundary

In [6]:
filelist = sorted(os.listdir(dir_raw_annotations))

# run over all raw images
for filename in filelist:
    
    # GET ANNOTATION
    annot = skimage.io.imread(dir_raw_annotations + filename)
    
    # strip the first channel
    annot = annot[:,:,0]
    
    # label the annotations nicely to prepare for future filtering operation
    annot = skimage.morphology.label(annot)
    
    # filter small objects, e.g. micronulcei
    annot = skimage.morphology.remove_small_objects(annot, min_size=min_nucleus_size)
    
    # find boundaries
    boundaries = skimage.segmentation.find_boundaries(annot)

    for k in range(2, boundary_size, 2):
        boundaries = skimage.morphology.binary_dilation(boundaries)
        
    # BINARY LABEL
    
    # prepare buffer for binary label
    label_binary = np.zeros((annot.shape + (3,)))
    
    # write binary label
    label_binary[(annot == 0) & (boundaries == 0), 0] = 1
    label_binary[(annot != 0) & (boundaries == 0), 1] = 1
    label_binary[boundaries == 1, 2] = 1
    
    # save it - converts image to range from 0 to 255
    skimage.io.imsave(dir_boundary_labels + filename, label_binary)
    
    if(debug):
        print(annot.dtype, annot.shape)
        
        # plot original annotation
        plt.imshow(annot)
        plt.colorbar()
        plt.show()
        
        # plot boundary labels
        plt.imshow(label_binary)
        plt.colorbar()
        plt.show()

  .format(dtypeobj_in, dtypeobj_out))
  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)


## Image Preprocessing

In [7]:
if transform_images_to_PNG:

    filelist = sorted(os.listdir(dir_raw_images))

    # run over all raw images
    for filename in filelist:

        # load image and its annotation
        img = skimage.io.imread(dir_raw_images + filename)

        if(debug):
            print("BEFORE")
            print(img.dtype, img.shape)
            plt.imshow(img)
            plt.show()
            plt.hist(img.flatten())
            plt.show()        

        # IMAGE

        # normalize to [0,1]
        percentile = 99.9
        high = np.percentile(img, percentile)
        low = np.percentile(img, 100-percentile)

        img = np.minimum(high, img)
        img = np.maximum(low, img)

        img = (img - low) / (high - low) # gives float64, thus cast to 8 bit later
        img = skimage.img_as_ubyte(img) 

        skimage.io.imsave(dir_images_normalized_8bit + filename[:-3] + 'png', img)

        if(debug):
            print("AFTER")
            print(img.dtype, img.shape)
            plt.imshow(img)
            plt.show()
            plt.hist(img.flatten())
            plt.show()
else:
    dir_images_normalized_8bit = dir_raw_images

  .format(dtypeobj_in, dtypeobj_out))


## Splitting Up
- Split up in training, validation and test sets

In [8]:
with open(path_files_training) as f:
    training_files = f.read().splitlines()
with open(path_files_validation) as f:
    validation_files = f.read().splitlines()
with open(path_files_test) as f:
    test_files = f.read().splitlines()

In [9]:
os.makedirs(dir_training + 'x/', exist_ok=True)
os.makedirs(dir_validation + 'x/0/', exist_ok=True)
os.makedirs(dir_test + 'x/0/', exist_ok=True)

os.makedirs(dir_training + 'y/', exist_ok=True)
os.makedirs(dir_validation + 'y/0/', exist_ok=True)
os.makedirs(dir_test + 'y/0/', exist_ok=True)

for filename in training_files:
    shutil.copyfile(dir_images_normalized_8bit + filename, dir_training + 'x/' + filename)
    shutil.copyfile(dir_boundary_labels + filename, dir_training + 'y/' + filename)
    
for filename in validation_files:
    shutil.copyfile(dir_images_normalized_8bit + filename, dir_validation + 'x/0/' + filename)
    shutil.copyfile(dir_boundary_labels + filename, dir_validation + 'y/0/' + filename)
    
for filename in test_files:
    shutil.copyfile(dir_images_normalized_8bit + filename, dir_test + 'x/0/' + filename)
    shutil.copyfile(dir_boundary_labels + filename, dir_test + 'y/0/' + filename)