# Analyze data using ilastik
The notebook shows how load images contained in a dataset
from OMERO as numpy arrays and analyze them in ilastik.
It assumes that ilastik project is linked to the dataset.
The 5D-numpy array is in the order expected by the ilastik project.
The order might need to be adjusted depending on the ilastik project.

### Insert required packages

In [1]:
import numpy
import os
import subprocess
from itertools import product
# from functools import reduce
# from operator import mul

import tempfile
import omero.clients
from omero.gateway import BlitzGateway
import omero
from getpass import getpass

### Create a connection to an OMERO server

In [10]:
HOST = 'workshop.openmicroscopy.org'
PORT = 4064

conn = BlitzGateway(input("Username: "),
                    getpass("OMERO Password: "),
                    host=HOST, port=PORT)
conn.connect()

True

### Enter the dataset ID

In [3]:
dataset_id = 6205
training_dataset_tag = 'training_dataset'  # tag id 127509
validation_dataset_tag = 'validation_dataset'  # tag id 127510

### Helper function: Load the ilastik project linked to the dataset

In [4]:
def load_model(dataset_id, path):
    dataset = conn.getObject("Dataset", dataset_id)
    file_paths = list()
    # Go through all the annotations on the Dataset
    for ann in dataset.listAnnotations():
        if isinstance(ann, omero.gateway.FileAnnotationWrapper):
            name = ann.getFile().getName()
            # Select the ilatisk project TODO: use namespace
            if name.endswith(".ilp"):
                file_path = os.path.join(path, name)
                with open(str(file_path), 'wb') as f:
                    for chunk in ann.getFileInChunks():
                        f.write(chunk)
                file_paths.append(file_path)
    return file_paths

### Helper function: load images labelled with a specific tag

In [5]:
def get_tagged_images(dataset, tag):
    images = dataset.listChildren()
    tagged_images = list()
    for image in images:
        for ann in image.listAnnotations():
            if ann.OMERO_TYPE == omero.model.TagAnnotationI and ann.getTextValue() == tag:
                tagged_images.append(image)
                break
                
    return tagged_images

### Helper function: load an Image as 5D-numpy array: order TZYXC

In [6]:
def load_numpy_array(image):
    image_shape = (image.getSizeT(),
                   image.getSizeZ(),
                   image.getSizeC(),
                   image.getSizeX(),
                   image.getSizeY())

    nr_planes = image_shape[0] * image_shape[1] * image_shape[2]

    zct_list = list(product(range(image_shape[1]),
                            range(image_shape[2]),
                            range(image_shape[0])))
    pixels = image.getPrimaryPixels()
    pixels_type = pixels.getPixelsType()
    if pixels_type.value == 'float':
        data_type = pixels_type.value + str(pixels_type.bitSize)  # TODO: Verify this is working for all data types
    else:
        data_type = pixels_type.value
    stack = numpy.zeros((nr_planes,
                      image.getSizeX(),
                      image.getSizeY()), dtype=data_type)
    numpy.stack(list(pixels.getPlanes(zct_list)), out=stack)
    stack = numpy.reshape(stack, image_shape)

    return stack

In [7]:
def plane_gen(data):
    """
    Set up a generator of 2D numpy arrays.

    The createImage method below expects planes in the order specified here
    (for z.. for c.. for t..)

    """

    for z in range(data.shape[0]):  # all Z sections data.shape[0]
        for c in range(data.shape[1]):  # all channels
            for t in range(data.shape[2]):  # all time-points
                yield data[z][c][t]

### Export images to build an Ilastik model
This section will get the images tagged as part of the training set and save them in a Ilastik-friendly format

First we get the dataset...

In [11]:
# Get the dataset to analyze
dataset = conn.getObject('Dataset', dataset_id)
print(dataset.getname())

Saved training image as /run/media/julio/DATA/Quentin/training_dataset/20181012_S2Rplus_FISH_DAPI_X1-A488_X2-A555_X3-A647_Mark_and_Find_001_Position001__DAPI.npy
Saved training image as /run/media/julio/DATA/Quentin/training_dataset/20181012_S2Rplus_FISH_DAPI_X1-A488_X2-A555_X3-A647_Mark_and_Find_001_Position001__DAPI_ch1.npy
Saved training image as /run/media/julio/DATA/Quentin/training_dataset/20181012_S2Rplus_FISH_DAPI_X1-A488_X2-A555_X3-A647_Mark_and_Find_001_Position001__DAPI_ch2.npy
Saved training image as /run/media/julio/DATA/Quentin/training_dataset/20181012_S2Rplus_FISH_DAPI_X1-A488_X2-A555_X3-A647_Mark_and_Find_001_Position001__DAPI_ch3.npy
Saved training image as /run/media/julio/DATA/Quentin/training_dataset/20181012_S2Rplus_FISH_DAPI_X1-A488_X2-A555_X3-A647_Mark_and_Find_001_Position002__DAPI.npy
Saved training image as /run/media/julio/DATA/Quentin/training_dataset/20181012_S2Rplus_FISH_DAPI_X1-A488_X2-A555_X3-A647_Mark_and_Find_001_Position002__DAPI_ch1.npy
Saved traini

and then we can get the images associated to that dataset that are tagged as training_dataset

In [None]:
# Get the training images
training_images = get_tagged_images(dataset, training_dataset_tag)

# Create a directory to save the images
# training_path = tempfile.mkdtemp()
training_path = '/run/media/julio/DATA/Quentin/training_dataset'
if not os.path.exists(training_path):
    os.makedirs(training_path)

# Loop through every image to save it as a numpy array
for image in training_images:
    image_data = load_numpy_array(image)
    filename, file_extension = os.path.splitext(image.getName())
    filename = filename.replace('/', '_')
    filename = filename.replace('.lif [', '_')
    filename = filename.replace(']', '')
    training_file = os.path.join(training_path, filename)
    numpy.save(f'{training_file}_DAPI', image_data[:,:,0,...])
    print(f'Saved training image as {training_file}_DAPI.npy')

    for ch in range(1, 4):
        numpy.save(f'{training_file}_DAPI_ch{ch}', image_data[:,:,(0,ch),...])
        print(f'Saved training image as {training_file}_DAPI_ch{ch}.npy')

## Create an Ilastik model using the exported training images

TODO: We should propose here to open the virtual desktop
Instructions to train a model

## Validate the models
You should now have created four models: one to find the nucleus and one for each channel.
These models should have been uploaded as attachments to the dataset so we can use them from now on.

In [None]:
# Create a temporary directory to save model and images
validation_path = tempfile.mkdtemp()
if not os.path.exists(validation_path):
    os.makedirs(validation_path)
    
# Load the model linked to the dataset
model_files = load_model(dataset_id, validation_path)

# Load the images tagged for validation
validation_images = get_tagged_images(dataset, validation_dataset_tag)

# Create a new dataset where to upload the generated images
validation_dataset = omero.model.DatasetI()
v = f'ilastik_validation_probabilities_from_dataset_{dataset_id}'
validation_dataset.setName(omero.rtypes.rstring(v))
v = f'ilastik validation probabilities from dataset {dataset_id}. This dataset is for validation'
validation_dataset.setDescription(omero.rtypes.rstring(v))
validation_dataset = conn.getUpdateService().saveAndReturnObject(validation_dataset)

# We link the new dataset to the same project as the original dataset
source_project = dataset.getParent()
link = omero.model.ProjectDatasetLinkI()
link.setParent(source_project._obj)
link.setChild(validation_dataset)
conn.getUpdateService().saveObject(link)

print('These are the models attached to the dataset')
for m in model_files: print(m)
print('These are the validation images')
for i in validation_images: print(i.getName())

### Load each image as an 5D-numpy array and analyze.
Now everything is setup we can go through the validation images and verify the results.
We will have to specify which model we want to run for each channel. There are 4 channels:
- DAPI = channel_0
- A488 = channel_1
- A555 = channel_2
- A647 = channel_3

In [25]:
# Specify here which model to use to segment each channel in the same order as they appear in the image
model_files = [
    'Nuclei_model_v2.ilp',
    'Ch1_model_v2.ilp',
    'Ch3_model_v2.ilp',
    'Ch2_model_v2.ilp',
    ]

output_directory = tempfile.TemporaryDirectory()

for image in validation_images:
    filename, file_extension = os.path.splitext(image.getName())
    filename = filename.replace('/', '_')
    filename = filename.replace('.lif [', '_')
    filename = filename.replace(']', '')
    tmp_file = os.path.join(validation_path, filename)
    image_data = load_numpy_array(image)
    
    for channel, model_file in enumerate(model_files):
        if channel == 0:
            tmp_file_channel = f'{tmp_file}_DAPI'
            numpy.save(tmp_file_channel, image_data[:,:,0,...])
        else:
            tmp_file_channel = f'{tmp_file}_DAPI_Ch{channel}'
            numpy.save(tmp_file_channel, image_data[:,:,(0,channel),...])

        model_path = os.path.join(validation_path, model_file)
        # run each ilastik model headless
        print(f'running ilastik using {model_file} and {image.getName()}')
        # save output in zctyx order expected by OMERO
        cmd = ['/opt/python-apps/ilastik-release/run_ilastik.sh',
               '--headless',
               f'--project={model_path}',
               '--export_source=Probabilities',
               '--output_format=numpy',
               # f'--output_filename_format={{dataset_dir}}/{{nickname}}_Probabilities.npy',
               # '--output_axis_order=zctyx', 
               f'{tmp_file_channel}.npy']
        try:
            subprocess.run(cmd, check=True, stdout=subprocess.PIPE).stdout
        except subprocess.CalledProcessError as e:
            print(f'Error: {e.output}')
            print(f'Command: {e.cmd}')
            print()
        
        print('List of outputs:')
        for x in os.listdir('/home/jovyan/notebooks/DATA'):
            print(x)
    
        # Save the probabilities file to the image
        omero_name = os.path.splitext(os.path.basename(tmp_file_channel))[0] + '_Probabilities'
        print(f'Saving Probabilities as an Image in OMERO as {omero_name}')
        output_data = numpy.load(f'{tmp_file_channel}_Probabilities.npy')
        print(f'old shape = {output_data.shape}')
        if len(output_data.shape) == 4:
            output_data = output_data.reshape(output_data.shape[:2] + (1,) + output_data.shape[2:])
        print(f'new shape = {output_data.shape}')
        desc = f'ilastik probabilities from Image {image.getId()} and model {model_file}'
        conn.createImageFromNumpySeq(zctPlanes=plane_gen(output_data), 
                                     imageName=omero_name,
                                     sizeZ=output_data.shape[1], 
                                     #sizeC=data.shape[2],
                                     #sizeT=data.shape[0], 
                                     description=desc,
                                     dataset=validation_dataset)

print("done")

z:236 t:1 c:2 y:275 x:271
Downloading image B1_C1.tif
Image converted
running ilastik headless using /tmp/tmpInvF2D/pixel-class-wednesday.ilp on file B1_C1.tif
Saving Probabilities as an Image in OMERO
done


### Close the connection to the OMERO server

In [11]:
conn.close()



### License
Copyright (C) 2019 University of Dundee. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details. You should have received a copy of the GNU General
Public License along with this program; if not, write to the
Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.