In [1]:
## universal segmentor, run notebook before training
## Limitations:
## All input images must have the same size


In [2]:
import sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from glob import glob
from tqdm import tqdm
import tifffile as tif
import os
import pickle
import random
import shutil

In [3]:
import elasticdeform
import albumentations
import Augmentor

In [4]:
import imageio
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.segmaps import SegmentationMapsOnImage

In [5]:
from csbdeep.utils import Path, download_and_extract_zip_file

In [6]:
import requests
from zipfile import ZipFile
import os

In [26]:
import skimage.morphology
import skimage.transform
from scipy.ndimage.morphology import binary_dilation

## functions

In [7]:
def label_split(label, augS):
    #creates gap between objects
    boundary_width = augS["boundary_width"]
    base_mask = np.zeros((label.shape), dtype = np.uint8)
    # itter over all label
    for l in np.unique(label):
        if l > 0:
            mask_cell = np.zeros((label.shape), dtype = bool)
            mask_cell[np.where(label == l)] = 1
            mask_cell = binary_dilation(mask_cell, iterations=boundary_width)
            base_mask = base_mask + mask_cell
            split= np.where(base_mask > 1) # covered by more than on object
    label[split] = 0
    return label

In [8]:
def augment_batch(im,ma,names,aug_settings,silent = True):
    '''creates random augmentations
       based on predefined sets handed over
       aug_per_img defines the number of different augmentation tpyes per image
       itter the augmentation per image per pipeline
       crops and enlarges to avoid egde effects
    '''
    # renumber pipelines for augmentation
    crop = aug_settings["crop"]
    my_pipelines = {}
    for i,key in enumerate(aug_settings["pre_defined_pipeline"]):
        my_pipelines[i] = aug_settings["aug_sets"][key]
    # extract variables
    aug_per_img = aug_settings["aug_per_img"]
    itter = aug_settings["itter"]
    border= aug_settings["border"]
    clean_it = aug_settings["clean_it"]
    # create lists
    images_aug = []
    masks_aug = []
    names_aug = []
    for i,image in enumerate(im):
        mask = ma[i,:,:,:]
        segmap = SegmentationMapsOnImage(mask, shape=image.shape)
        AugList = random.sample(range(0,len(my_pipelines)),aug_per_img)
        for aug_id in AugList:
            pipeline = my_pipelines[aug_id]
            if not silent:
                print(aug_id)
                print(pipeline)
            for it in range(0,itter):
                images_aug_i, segmaps_aug_i = pipeline(image=image, segmentation_maps=segmap)
                if clean_it:
                    im_new,mask_new = clean_border(images_aug_i,segmaps_aug_i.get_arr(),border)
                    if sum(crop) > 0:
                        im_new,mask_new = crop_image(im_new,mask_new,crop)
                    images_aug.append(im_new)
                    masks_aug.append(mask_new)
                else:
                    if sum(crop) > 0: # crop only
                        im_new,mask_new = crop_image(images_aug_i,segmaps_aug_i.get_arr(),crop)
                        images_aug.append(im_new)
                        masks_aug.append(mask_new)
                    else: # just copy    
                        images_aug.append(images_aug_i)
                        masks_aug.append(segmaps_aug_i.get_arr())
                names_aug.append(names[i].replace(".tif","_{}_{}.tif".format(str(aug_id).zfill(4),str(it).zfill(3))))
    return images_aug,masks_aug,names_aug    

In [9]:
def load_batch(IMG_path,MASK_path,start_id,batch_size,augS,segmaps_per_img = 1):
    # loads first image
    image = imageio.imread(IMG_path[0])
    mask = imageio.imread(MASK_path[0])
    im_names = []
    img_batch = np.zeros((batch_size,*image.shape),dtype = image.dtype)
    mask_batch = np.zeros((batch_size,*mask.shape,segmaps_per_img),dtype = mask.dtype)
    for i in range(0,batch_size):
        img_batch[i,:,:] = imageio.imread(IMG_path[start_id+i])
        mask = imageio.imread(MASK_path[start_id+i])
        if augS["add_gap"]: # adds a gap
            mask = label_split(mask, augS)
        mask_batch[i,:,:,0] = mask
        im_names.append(os.path.split(IMG_path[start_id+i])[1])
    return img_batch,mask_batch,im_names

In [10]:
def save_batch(img_aug,mask_aug,names_new,path):
    # saves a batch of augmented images to the new directory
    for i, im in enumerate(img_aug):
        im_name = os.path.join(path,"images",names_new[i])
        #print("Saving im_name..", im_name)
        imageio.imwrite(im_name,im)
        mask_name = os.path.join(path,"masks",names_new[i])
        imageio.imwrite(mask_name,mask_aug[i])
    return

In [11]:
def clean_border(im,ma,border):
    # enlarges and crops to avoid egde effects
    # get sizes
    x,y = im.shape
    cropx = x - (2*border)
    cropy = y - (2*border)
    # define pipeline
    normP = iaa.Sequential([
        iaa.CropToFixedSize(width = cropx ,height = cropy, position = "center"),     # crop
        iaa.Resize({"width" : x ,"height" : y})
        ], random_order=False)
    # create segmap
    segmap_norm = SegmentationMapsOnImage(ma, shape=im.shape)
    images_aug, segmaps_aug = normP(image=im, segmentation_maps=segmap_norm)
    masks_aug = (segmaps_aug.get_arr())
    return images_aug,masks_aug

In [12]:
def crop_image(im,ma,crop_dims):
    # crops image
    cropy, cropx = crop_dims
    # define pipeline
    normP = iaa.Sequential([
        iaa.CropToFixedSize(width = cropx ,height = cropy, position = "center")], random_order=False)
    # create segmap
    segmap_norm = SegmentationMapsOnImage(ma, shape=im.shape)
    images_aug, segmaps_aug = normP(image=im, segmentation_maps=segmap_norm)
    masks_aug = (segmaps_aug.get_arr())
    return images_aug,masks_aug

In [13]:
def make_folder(tms,new_name,crop):
    #creates new folder &
    # copies Train and Validate Datasets
    new_folder = os.path.join(trainModelSettings["root"],new_name)
    if not os.path.exists(new_folder):
        os.makedirs(new_folder)
        os.makedirs(os.path.join(new_folder,"train","images"))
        os.makedirs(os.path.join(new_folder,"train","masks"))
        # copy test & validate data (will not be augmented)
        for copy_only in ["test"]:
            if sum(crop) > 0: # means crop image, just make folder and crop later
                os.makedirs(os.path.join(new_folder,"test","images"))
                os.makedirs(os.path.join(new_folder,"test","masks"))
            else:
                src = os.path.join(tms["path"],copy_only)
                dst = os.path.join(trainModelSettings["root"],new_name,copy_only)
                if os.path.exists(src):
                    print("Copy ",src, " to ", dst)
                    shutil.copytree(src,dst)
    return

In [14]:
def download_from_owncloud(url, outfile=None):
    url = url.replace('?dl=0', '')
    url = url.replace('?dl=1', '')
    if not '?raw=1' in url:
        url += "?raw=1"
    r = requests.get(url, allow_redirects=True)
    if not outfile:
        outfile = extract_filename_from_url(url)
    if outfile:
        print("Downloading...",outfile)
        open(outfile, 'wb').write(r.content)
    return

In [15]:
def unzip_file(outfile):
    # unzips downloaded file
    #targetdir = os.path.join(os.path.split(outfile)[0],os.path.split(outfile)[1].replace(".zip",""))
    targetdir = os.path.join(os.path.split(outfile)[0])
    with ZipFile(outfile,'r') as zip_file:
        print(' extracting to',targetdir)
        zip_file.extractall(str(targetdir))
        provided = zip_file.namelist()
    print("Done")
    return

In [16]:
def download_data(DD,load_me,main_root,keep_zip=False):
    # downloads and extracts data from owncloud
    # define url
    url = DD[load_me]
    # make folder
    new_folder = os.path.join(main_root)
    if not os.path.exists('my_folder'):
        os.makedirs('my_folder')
    print("Make folder")
    # download
    outfile = os.path.join(new_folder,"{}.zip".format(load_me))
    print("Start download")
    download_from_owncloud(url,outfile)
    # unzip
    print("Unzip")
    unzip_file(outfile)
    # cleanup
    if not keep_zip:
        print("Deleting..",outfile)
        os.remove(outfile)
    return
    

## variables

In [17]:
file_path = "./my_runs/augment_settings_xl.pkl"
infile = open(file_path,'rb')
parameter = pickle.load(infile)
print("Loading processing pipeline from",file_path)
infile.close()
aug_sets,pre_defined_pipelines,data_main_GT,Datasets_Download = parameter

Loading processing pipeline from ./my_runs/augment_settings_xl.pkl


In [18]:
# define init download (if needed)
trainModelSettings = {}
trainModelSettings["root"] = data_main_GT
trainModelSettings["data"] = "DSB2018_FL_Nuc_Subset"
trainModelSettings["Download_Data"] = False

In [19]:
if trainModelSettings["Download_Data"]:
    download_data(Datasets_Download,trainModelSettings["data"],trainModelSettings["root"],False)

Make folder
Start download
Downloading... /mnt/ag-microscopy/SampleDataML/OpSeF_XL_Data/GT/DSB2018_FL_Nuc_Subset.zip
Unzip
 extracting to /mnt/ag-microscopy/SampleDataML/OpSeF_XL_Data/GT
Done
Deleting.. /mnt/ag-microscopy/SampleDataML/OpSeF_XL_Data/GT/DSB2018_FL_Nuc_Subset.zip


In [20]:
# define how to augment (chance each time for a pipeline)
augment_setting = {}
augment_setting["Aug_Pipeline_Name"] = "Basic_Nuc" # see full documentation in OpSeF_XL_Configure_002
augment_setting["Tag"] = "512AllGap"
augment_setting["new name"] = "{}_{}_{}".format(trainModelSettings["data"],augment_setting["Aug_Pipeline_Name"],augment_setting["Tag"])
augment_setting["aug_sets"] =  aug_sets
augment_setting["pre_defined_pipeline"] = pre_defined_pipelines[augment_setting["Aug_Pipeline_Name"]]
# currently available sets: Basic_Nuc, Light_Nuc,Heavy_Nuc,Versatile_Nuc
augment_setting["batch_size"] = 10 # this number of images are one batch
augment_setting["itter"] = 3 # the augment pipeline will be applied itter times per image
augment_setting["aug_per_img"] = 3 # this number of pipelines will be run for each image in the batch
augment_setting["border"] = 50
augment_setting["clean_it"] = True
augment_setting["crop"] = [512,512] # enter [0] or [0,0] to skip cropping
augment_setting["add_gap"] = True # makes gap between cells
augment_setting["boundary_width"] = 2 # define gap width

In [21]:
# define location of data
trainModelSettings["path"] = os.path.join(trainModelSettings["root"],trainModelSettings["data"])
trainModelSettings["path_out"] =  os.path.join(trainModelSettings["root"],augment_setting["new name"],"train")
trainModelSettings["path_out_test"] = os.path.join(trainModelSettings["root"],augment_setting["new name"],"test")

In [22]:
#  save settings
parameter = [augment_setting,trainModelSettings]

# save it
file_name = "./my_runs/Parameter_Augment_{}.pkl".format(augment_setting["new name"])
file_name_load = "./Train/Augment/my_runs/Parameter_Augment_{}.pkl".format(augment_setting["new name"])
print("The parameters for augmentation were stored in this file: \n",file_name_load)
outfile = open(file_name,'wb')
pickle.dump(parameter,outfile)
outfile.close()

The parameters for augmentation were stored in this file: 
 ./Train/Augment/my_runs/Parameter_Augment_DSB2018_FL_Nuc_Subset_Basic_Nuc_512AllGap.pkl


## main code

In [23]:
# define and analyse input
IMG_path = sorted(glob('{}/train/images/*.tif'.format(trainModelSettings["path"])))
MASK_path = sorted(glob('{}/train/masks/*.tif'.format(trainModelSettings["path"])))
assert all(os.path.split(IMG_path[x])[1]==os.path.split(MASK_path[x])[1] for x in range(0,len(IMG_path)))

In [24]:
# make folder & copy test images that will not be augmented
make_folder(trainModelSettings,augment_setting["new name"],augment_setting["crop"])
start_id = 0
batch = 0

In [27]:
# run all images in batches
while start_id < len(IMG_path):
    print("Running Batch: ", batch)
    # load images:
    if (start_id + augment_setting["batch_size"]) < len(IMG_path):
        X,Y,N = load_batch(IMG_path,MASK_path,start_id,augment_setting["batch_size"],augment_setting)
    else:
        rest = len(IMG_path) - start_id
        print("Running the last images: ",rest)
        X,Y,N = load_batch(IMG_path,MASK_path,start_id,rest,augment_setting)
    Xnew,Ynew,Nnew = augment_batch(X,Y,N,augment_setting)
    save_batch(Xnew,Ynew,Nnew,trainModelSettings["path_out"])
    start_id += augment_setting["batch_size"]
    batch += 1 

Running Batch:  0
Running Batch:  1
Running Batch:  2
Running Batch:  3
Running Batch:  4
Running Batch:  5
Running Batch:  6
Running Batch:  7
Running the last images:  7


In [29]:
if sum(augment_setting["crop"]) > 0:
    # load test images
    IMG_path = sorted(glob('{}/test/images/*.tif'.format(trainModelSettings["path"])))
    MASK_path = sorted(glob('{}/test/masks/*.tif'.format(trainModelSettings["path"])))
    assert all(os.path.split(IMG_path[x])[1]==os.path.split(MASK_path[x])[1] for x in range(0,len(IMG_path)))
    #
    for img,mask in zip(IMG_path,MASK_path):
        #print(img)
        im_new,mask_new = crop_image(imageio.imread(img),imageio.imread(mask),augment_setting["crop"])
        #print(im_new.shape)
        im_name = os.path.join(trainModelSettings["path_out_test"],"images",os.path.split(img)[1])
        mask_name = os.path.join(trainModelSettings["path_out_test"],"masks",os.path.split(img)[1])
        #print(im_name)
        imageio.imwrite(im_name,im_new)
        imageio.imwrite(mask_name,mask_new)