# Cell/particle Counting and scoring stained objects

This notebook demonstrates how to process plates associated to the paper ['Integration of biological data by kernels on graph nodes allows prediction of new genes involved in mitotic chromosome condensation.'](http://dx.doi.org/10.1091/mbc.E13-04-0221) using [CellProfiler](http://cellprofiler.org/).
We use the example pipeline [Cell/particle counting, and scoring the percentage of stained objects](http://cellprofiler.org/examples/#PercentPositive). This pipeline is for two-channel images.
Metadata are loaded from IDR and binary data from S3.

In [None]:
# When running the notebook on M1,
# you might have to remove this cell after removing the comment
#import os
#os.environ["JAVA_HOME"]="/full/path/to/conda_env"

### Import Packages

In [2]:
import os
os.environ["JAVA_HOME"]="/Users/jmarie/opt/anaconda3/envs/cp_test"

In [3]:
# %tb

# Import Cell Profiler Dependencies
import cellprofiler
import cellprofiler_core.preferences as cpprefs
import cellprofiler.modules as cpm
import cellprofiler_core.pipeline as cpp
cpprefs.set_headless()

# Inject Image module used to inject OMERO image planes into Cell Profiler Pipeline
from cellprofiler_core.modules.injectimage import InjectImage

# Import OMERO Python BlitzGateway
import omero
from omero.gateway import BlitzGateway

# Import Numpy
import numpy as np

# Import Python System Packages
import os
import tempfile
import pandas
import warnings

import zarr
import s3fs
import dask.array as da

# Import Matplotlib
import matplotlib

### Set Cell Output Directory

In [4]:
new_output_directory = os.path.normcase(tempfile.mkdtemp())
cpprefs.set_default_output_directory(new_output_directory)

### Connect to IDR

In [5]:
conn = BlitzGateway(host='ws://idr.openmicroscopy.org/omero-ws', username='public', passwd='public', secure=True)
print(conn.connect())
conn.c.enableKeepAlive(60)

True


### Fetch from IDR the Plate that contains the Images to be analysed

In [6]:
plate_id = 422
plate = conn.getObject("Plate", plate_id)
plate_name = plate.getName()
plate_acquisitions = list(plate.listPlateAcquisitions())
row_labels = plate.getRowLabels()
print('Plate Name: ', plate_name)
screen_name = plate.getParent().getName().split("/")[0]

Plate Name:  plate1_1_013


In [7]:
# Load pipeline and inspect modules
pipeline = cpp.Pipeline()
pipeline.load("./pipelines/ExamplePercentPositive.cppipe")

# Remove first 4 modules: Images, Metadata, NamesAndTypes, Groups...
# (replaced by InjectImage module below)
for i in range(4):
    print('Remove module: ', pipeline.modules()[0].module_name)
    pipeline.remove_module(1)

print('Pipeline modules:')
for module in pipeline.modules():
    print(module.module_num, module.module_name)

Remove module:  Images
Remove module:  Metadata
Remove module:  NamesAndTypes
Remove module:  Groups
Pipeline modules:
1 IdentifyPrimaryObjects
2 IdentifyPrimaryObjects
3 RelateObjects
4 FilterObjects
5 MeasureObjectIntensity
6 OverlayOutlines
7 DisplayDataOnImage
8 ClassifyObjects
9 CalculateMath
10 ExportToSpreadsheet


### Load dask array from S3

In [16]:
def load_dask_array_from_s3(run='0', column='1', name='A', field='Field_1', resolution='0'):
    cache_size_mb = 2048
    # ID to be changed
    cfg = {
        'anon': True,
        'client_kwargs': {
            'endpoint_url': 'https://uk1s3.embassy.ebi.ac.uk/',
        },
        'root': 'idr/share/community-call-2020-10-29/%s/%s/%s.zarr/%s/%s/%s/%s/%s' % (screen_name, plate_name, plate_id, run, name, column, field, resolution)
    }
    s3 = s3fs.S3FileSystem(
        anon=cfg['anon'],
        client_kwargs=cfg['client_kwargs'],
    )
    print(cfg['root'])
    store = s3fs.S3Map(root=cfg['root'], s3=s3, check=False)
    cached_store = zarr.LRUStoreCache(store, max_size=(cache_size_mb * 2**20))
    # data.shape is (t, c, z, y, x) by convention
    return da.from_zarr(cached_store)

### Run Cell Profiler Pipeline on the plate

In [17]:
warnings.filterwarnings('ignore')

Nuclei = pandas.DataFrame()
files = list()

# create list from generator
wells = list(plate.listChildren())
# use the first 5 wells only
wells = wells[0:5]
well_count = len(wells)
run = 0
if len(plate_acquisitions) > 0:
    run = plate_acquisitions[0].getName()
for count, well in enumerate(wells):
    print('Well: %s/%s' % (count + 1, well_count), 'row:', well.row, 'column:', well.column)
    # Load a single Image per Well
    row_label = row_labels[well.row]
    print(row_label)
    image = well.getImage(0)
    %time data = load_dask_array_from_s3(run, well.column+1, row_label.upper())
    size_c = image.getSizeC()

    # For each Image in OMERO, we copy pipeline and inject image modules
    pipeline_copy = pipeline.copy()

    # Inject image for each Channel (pipeline only handles 2 channels)
    for c in range(0, size_c):

        %time plane = data[0, c, 0, :, :]
        image_name = image.getName()

        # Name of the channel expected in the pipeline
        if c == 0:
            image_name = 'OrigBlue'
        if c == 1:
            image_name = 'OrigGreen'

        inject_image_module = InjectImage(image_name, plane)
        inject_image_module.set_module_num(1)
        pipeline_copy.add_module(inject_image_module)

    m = pipeline_copy.run()

    # Results obtained as CSV from Cell Profiler
    path = new_output_directory + '/Nuclei.csv'
    f = pandas.read_csv(path, index_col=None, header=0)
    f['Image'] = image.getId()
    f['Well'] = well.getId()
    f['Cell_Count'] = len(f.index)
    files.append(f)

Nuclei = pandas.concat(files, ignore_index=True)

Well: 1/5 row: 4 column: 10
E
idr/share/community-call-2020-10-29/idr0002-heriche-condensation/plate1_1_013/422.zarr/Run 422/E/11/Field_1/0
CPU times: user 69 ms, sys: 38.5 ms, total: 108 ms
Wall time: 516 ms
CPU times: user 761 µs, sys: 395 µs, total: 1.16 ms
Wall time: 1.99 ms
CPU times: user 489 µs, sys: 6 µs, total: 495 µs
Wall time: 514 µs
Well: 2/5 row: 7 column: 5
H
idr/share/community-call-2020-10-29/idr0002-heriche-condensation/plate1_1_013/422.zarr/Run 422/H/6/Field_1/0
CPU times: user 4.83 ms, sys: 377 µs, total: 5.2 ms
Wall time: 156 ms
CPU times: user 355 µs, sys: 1 µs, total: 356 µs
Wall time: 359 µs
CPU times: user 177 µs, sys: 1e+03 ns, total: 178 µs
Wall time: 179 µs
Well: 3/5 row: 2 column: 0
C
idr/share/community-call-2020-10-29/idr0002-heriche-condensation/plate1_1_013/422.zarr/Run 422/C/1/Field_1/0
CPU times: user 6.23 ms, sys: 408 µs, total: 6.64 ms
Wall time: 159 ms
CPU times: user 563 µs, sys: 14 µs, total: 577 µs
Wall time: 595 µs
CPU times: user 253 µs, sys: 1

### Calculate statistics

In [37]:
Nuclei.describe()

Unnamed: 0,ImageNumber,ObjectNumber,Children_PH3PosNuclei_Count,Children_PH3_Count,Classify_PH3Neg,Classify_PH3Pos,Intensity_IntegratedIntensityEdge_OrigBlue,Intensity_IntegratedIntensityEdge_OrigGreen,Intensity_IntegratedIntensity_OrigBlue,Intensity_IntegratedIntensity_OrigGreen,...,Location_MaxIntensity_X_OrigBlue,Location_MaxIntensity_X_OrigGreen,Location_MaxIntensity_Y_OrigBlue,Location_MaxIntensity_Y_OrigGreen,Location_MaxIntensity_Z_OrigBlue,Location_MaxIntensity_Z_OrigGreen,Number_Object_Number,Image,Well,Cell_Count
count,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,...,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0
mean,1.0,14.314961,0.023622,0.03937,1.0,0.0,1.045768,2.252023,30.13423,34.567484,...,732.787402,732.464567,496.291339,495.543307,0.0,0.0,14.314961,179728.795276,67098.795276,27.629921
std,0.0,8.981198,0.15247,0.292813,0.0,0.0,0.148972,0.704518,9.795543,14.555114,...,359.218743,358.773701,274.348438,273.6233,0.0,0.0,8.981198,21.572447,21.572447,7.061862
min,1.0,1.0,0.0,0.0,1.0,0.0,0.689632,0.748363,8.53756,7.082658,...,43.0,43.0,21.0,25.0,0.0,0.0,1.0,179708.0,67078.0,14.0
25%,1.0,7.0,0.0,0.0,1.0,0.0,0.958999,1.768528,23.958961,25.997826,...,433.0,441.0,218.5,229.5,0.0,0.0,7.0,179720.0,67090.0,21.0
50%,1.0,13.0,0.0,0.0,1.0,0.0,1.055955,2.277424,28.993927,34.271229,...,789.0,781.0,528.0,536.0,0.0,0.0,13.0,179720.0,67090.0,30.0
75%,1.0,21.0,0.0,0.0,1.0,0.0,1.134928,2.71146,36.147074,44.674411,...,1005.0,1012.5,702.5,689.0,0.0,0.0,21.0,179729.0,67099.0,36.0
max,1.0,36.0,1.0,3.0,1.0,0.0,1.465461,3.857771,78.811536,99.760373,...,1316.0,1315.0,990.0,1006.0,0.0,0.0,36.0,179769.0,67139.0,36.0


### Close the connection to the server

In [11]:
conn.close()

### License (BSD 2-Clause)
Copyright (C) 2020-2023 University of Dundee. All Rights Reserved.

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.