# Preprocessing input images and annotations

In [3]:
%matplotlib inline

import csv
import matplotlib.pyplot as plt
import numpy as np
import os
import pathlib 
import random
import scipy.stats
import shutil

import skimage.filters
import skimage.io
import skimage.segmentation

import utils.data_augmentation
import utils.data_split
import warnings

In [5]:
debug = False

# Parameters

In [4]:
# assume a nucleus is at least 10 by 10 pixels big
min_nucleus_size = 3

# Transform gray scale TIF images to PNG
transform_images_to_PNG = False

# Pixels of the boundary (min 2 pixels)
boundary_size = 2

In [6]:
# general
dir_root = '/home/cells2numbers/2017_08_unet/data/'

# raw data
dir_raw_images = dir_root + 'raw_images/'
dir_raw_annotations = dir_root + 'raw_annotations/'

# Split files
create_split_files = True

path_files_training = dir_root + 'training.txt'
path_files_validation = dir_root + 'validation.txt'
path_files_test = dir_root + 'test.txt'

if create_split_files:
    file_list = os.listdir(dir_raw_images)

    [list_training, list_test, list_validation] = utils.data_split.create_image_lists(dir_raw_images,.5,.25)

    utils.data_split.write_path_files(path_files_training, list_training)
    utils.data_split.write_path_files(path_files_test, list_test)
    utils.data_split.write_path_files(path_files_validation, list_validation)

In [7]:
# Output directories

## split folders
dir_training = dir_root + 'unet/split/training/'
dir_validation = dir_root + 'unet/split/validation/'
dir_test = dir_root + 'unet/split/test/'

## boundary experiment
dir_boundary_labels = dir_root + 'unet/y/'

## input data, normalized and 8 bit
dir_images_normalized_8bit = dir_root + 'unet/x/'

# Create directories
os.makedirs(dir_training, exist_ok=True)
os.makedirs(dir_validation, exist_ok=True)
os.makedirs(dir_test, exist_ok=True)
os.makedirs(dir_boundary_labels, exist_ok=True)
os.makedirs(dir_images_normalized_8bit, exist_ok=True)

## Create Output Targets: Three Class Boundary

In [None]:
filelist = sorted(os.listdir(dir_raw_annotations))

# run over all raw images
for filename in filelist:
    
    # GET ANNOTATION
    annot = skimage.io.imread(dir_raw_annotations + filename)
    
    # strip the first channel
    if len(annot.shape) == 3:
        annot = annot[:,:,0]
    
    
    # label the annotations nicely to prepare for future filtering operation
    annot = skimage.morphology.label(annot)
    
    # filter small objects, e.g. micronulcei
    annot = skimage.morphology.remove_small_objects(annot, min_size=min_nucleus_size)
    
    # find boundaries
    boundaries = skimage.segmentation.find_boundaries(annot)

    for k in range(2, boundary_size, 2):
        boundaries = skimage.morphology.binary_dilation(boundaries)
        
    # BINARY LABEL
    
    # prepare buffer for binary label
    label_binary = np.zeros((annot.shape + (3,)))
    
    # write binary label
    label_binary[(annot == 0) & (boundaries == 0), 0] = 1
    label_binary[(annot != 0) & (boundaries == 0), 1] = 1
    label_binary[boundaries == 1, 2] = 1
    
    # save it - converts image to range from 0 to 255
    skimage.io.imsave(dir_boundary_labels + filename, label_binary)
    
    if(debug):
        print(annot.dtype, annot.shape)
        
        # plot original annotation
        plt.figure(figsize=(15,15))
        plt.imshow(annot)
        plt.colorbar()
        plt.show()
        
        # plot boundary labels
        plt.figure(figsize=(15,15))
        plt.imshow(label_binary)
        plt.colorbar()
        plt.show()

## Image Preprocessing

In [8]:
if transform_images_to_PNG:

    filelist = sorted(os.listdir(dir_raw_images))

    # run over all raw images
    for filename in filelist:

        # load image and its annotation
        img = skimage.io.imread(dir_raw_images + filename)

        if(debug):
            print("BEFORE")
            print(img.dtype, img.shape)
            plt.imshow(img)
            plt.show()
            plt.hist(img.flatten())
            plt.show()        

        # IMAGE

        # normalize to [0,1]
        percentile = 99.9
        high = np.percentile(img, percentile)
        low = np.percentile(img, 100-percentile)

        img = np.minimum(high, img)
        img = np.maximum(low, img)

        img = (img - low) / (high - low) # gives float64, thus cast to 8 bit later
        img = skimage.img_as_ubyte(img) 

        skimage.io.imsave(dir_images_normalized_8bit + filename[:-3] + 'png', img)

        if(debug):
            print("AFTER")
            print(img.dtype, img.shape)
            plt.imshow(img)
            plt.show()
            plt.hist(img.flatten())
            plt.show()
else:
    dir_images_normalized_8bit = dir_raw_images

# Augment images (optional) 
* data augmentation using affine transformations 
* n_points x n_points data points are equally distributed in the image 
* distort 
* n_augmentations images are calculated for each image 


In [12]:
# augmentation taks lots of times but only has to be computed once 
augment_images =  0

# augmentation parameters 
n_points = 10
distort = 5

# number of augmented images
n_augmentations = 10

if augment_images: 

    filelist = sorted(os.listdir(dir_images_normalized_8bit))
    
    # run over all raw images
    for filename in filelist:
            
        # check if boundary labels were calculated 
        my_file = pathlib.Path(dir_boundary_labels + filename)
        
        if my_file.is_file():
            
            # load image 
            x = skimage.io.imread(dir_images_normalized_8bit + filename)
            # load annotation 
            y = skimage.io.imread(dir_boundary_labels + filename)
            
            for n in range(1,n_augmentations):
                # augment image and annotation 
                x_augmented, y_augmented = utils.data_augmentation.deform(x, y, points = n_points, distort = distort)
                # filename for augmented images
                filename_augmented = os.path.splitext(filename)[0] + '_aug_{:03d}'.format(n) + os.path.splitext(filename)[1]
                skimage.io.imsave(dir_images_normalized_8bit + filename_augmented, x)
                skimage.io.imsave(dir_boundary_labels + filename_augmented, y)

refdataA_001_aug_001.png
refdataA_001_aug_002.png
refdataA_001_aug_003.png
refdataA_001_aug_004.png
refdataA_001_aug_005.png
refdataA_001_aug_006.png
refdataA_001_aug_007.png
refdataA_001_aug_008.png
refdataA_001_aug_009.png
refdataA_002_aug_001.png
refdataA_002_aug_002.png
refdataA_002_aug_003.png
refdataA_002_aug_004.png
refdataA_002_aug_005.png
refdataA_002_aug_006.png
refdataA_002_aug_007.png
refdataA_002_aug_008.png
refdataA_002_aug_009.png
refdataA_003_aug_001.png
refdataA_003_aug_002.png
refdataA_003_aug_003.png
refdataA_003_aug_004.png
refdataA_003_aug_005.png
refdataA_003_aug_006.png
refdataA_003_aug_007.png
refdataA_003_aug_008.png
refdataA_003_aug_009.png
refdataA_004_aug_001.png
refdataA_004_aug_002.png
refdataA_004_aug_003.png
refdataA_004_aug_004.png
refdataA_004_aug_005.png
refdataA_004_aug_006.png
refdataA_004_aug_007.png
refdataA_004_aug_008.png
refdataA_004_aug_009.png
refdataA_005_aug_001.png
refdataA_005_aug_002.png
refdataA_005_aug_003.png
refdataA_005_aug_004.png


refdataA_038_aug_001.png
refdataA_038_aug_002.png
refdataA_038_aug_003.png
refdataA_038_aug_004.png
refdataA_038_aug_005.png
refdataA_038_aug_006.png
refdataA_038_aug_007.png
refdataA_038_aug_008.png
refdataA_038_aug_009.png
refdataA_039_aug_001.png
refdataA_039_aug_002.png
refdataA_039_aug_003.png
refdataA_039_aug_004.png
refdataA_039_aug_005.png
refdataA_039_aug_006.png
refdataA_039_aug_007.png
refdataA_039_aug_008.png
refdataA_039_aug_009.png
refdataA_040_aug_001.png
refdataA_040_aug_002.png
refdataA_040_aug_003.png
refdataA_040_aug_004.png
refdataA_040_aug_005.png
refdataA_040_aug_006.png
refdataA_040_aug_007.png
refdataA_040_aug_008.png
refdataA_040_aug_009.png
refdataA_041_aug_001.png
refdataA_041_aug_002.png
refdataA_041_aug_003.png
refdataA_041_aug_004.png
refdataA_041_aug_005.png
refdataA_041_aug_006.png
refdataA_041_aug_007.png
refdataA_041_aug_008.png
refdataA_041_aug_009.png
refdataA_042_aug_001.png
refdataA_042_aug_002.png
refdataA_042_aug_003.png
refdataA_042_aug_004.png


refdataA_076_aug_001.png
refdataA_076_aug_002.png
refdataA_076_aug_003.png
refdataA_076_aug_004.png
refdataA_076_aug_005.png
refdataA_076_aug_006.png
refdataA_076_aug_007.png
refdataA_076_aug_008.png
refdataA_076_aug_009.png
refdataA_077_aug_001.png
refdataA_077_aug_002.png
refdataA_077_aug_003.png
refdataA_077_aug_004.png
refdataA_077_aug_005.png
refdataA_077_aug_006.png
refdataA_077_aug_007.png
refdataA_077_aug_008.png
refdataA_077_aug_009.png
refdataA_078_aug_001.png
refdataA_078_aug_002.png
refdataA_078_aug_003.png
refdataA_078_aug_004.png
refdataA_078_aug_005.png
refdataA_078_aug_006.png
refdataA_078_aug_007.png
refdataA_078_aug_008.png
refdataA_078_aug_009.png
refdataA_079_aug_001.png
refdataA_079_aug_002.png
refdataA_079_aug_003.png
refdataA_079_aug_004.png
refdataA_079_aug_005.png
refdataA_079_aug_006.png
refdataA_079_aug_007.png
refdataA_079_aug_008.png
refdataA_079_aug_009.png
refdataA_080_aug_001.png
refdataA_080_aug_002.png
refdataA_080_aug_003.png
refdataA_080_aug_004.png


refdataA_115_aug_001.png
refdataA_115_aug_002.png
refdataA_115_aug_003.png
refdataA_115_aug_004.png
refdataA_115_aug_005.png
refdataA_115_aug_006.png
refdataA_115_aug_007.png
refdataA_115_aug_008.png
refdataA_115_aug_009.png
refdataA_116_aug_001.png
refdataA_116_aug_002.png
refdataA_116_aug_003.png
refdataA_116_aug_004.png
refdataA_116_aug_005.png
refdataA_116_aug_006.png
refdataA_116_aug_007.png
refdataA_116_aug_008.png
refdataA_116_aug_009.png
refdataA_117_aug_001.png
refdataA_117_aug_002.png
refdataA_117_aug_003.png
refdataA_117_aug_004.png
refdataA_117_aug_005.png
refdataA_117_aug_006.png
refdataA_117_aug_007.png
refdataA_117_aug_008.png
refdataA_117_aug_009.png
refdataA_118_aug_001.png
refdataA_118_aug_002.png
refdataA_118_aug_003.png
refdataA_118_aug_004.png
refdataA_118_aug_005.png
refdataA_118_aug_006.png
refdataA_118_aug_007.png
refdataA_118_aug_008.png
refdataA_118_aug_009.png
refdataA_119_aug_001.png
refdataA_119_aug_002.png
refdataA_119_aug_003.png
refdataA_119_aug_004.png


KeyboardInterrupt: 

## Splitting Up
- Split up in training, validation and test sets

In [None]:
with open(path_files_training) as f:
    training_files = f.read().splitlines()
with open(path_files_validation) as f:
    validation_files = f.read().splitlines()
with open(path_files_test) as f:
    test_files = f.read().splitlines()

In [None]:
# Add a 0 suffix because Keras thinks everything is a classification task organized in directories.

os.makedirs(dir_training + 'x/', exist_ok=True)
os.makedirs(dir_validation + 'x/0/', exist_ok=True)
os.makedirs(dir_test + 'x/0/', exist_ok=True)

os.makedirs(dir_training + 'y/', exist_ok=True)
os.makedirs(dir_validation + 'y/0/', exist_ok=True)
os.makedirs(dir_test + 'y/0/', exist_ok=True)

for filename in training_files:
    shutil.copyfile(dir_images_normalized_8bit + filename, dir_training + 'x/' + filename)
    shutil.copyfile(dir_boundary_labels + filename, dir_training + 'y/' + filename)
    
for filename in validation_files:
    shutil.copyfile(dir_images_normalized_8bit + filename, dir_validation + 'x/0/' + filename)
    shutil.copyfile(dir_boundary_labels + filename, dir_validation + 'y/0/' + filename)
    
for filename in test_files:
    shutil.copyfile(dir_images_normalized_8bit + filename, dir_test + 'x/0/' + filename)
    shutil.copyfile(dir_boundary_labels + filename, dir_test + 'y/0/' + filename)