In [4]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import time

import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import pandas

from skimage.filters import threshold_otsu
from scipy.ndimage.morphology import distance_transform_edt
from skimage.feature import peak_local_max
from skimage.morphology import watershed
from skimage.measure import regionprops, label, find_contours
from skimage.transform import resize

sys.path.append(os.path.join(os.environ['REPO_DIR'], 'utilities'))
from utilities2015 import *
from data_manager import *
from metadata import *
from annotation_utilities import *
from registration_utilities import *
from cell_utilities import *

In [5]:
min_blob_area = 10
max_blob_area = 10000

In [6]:
alg = 'cellprofiler'
# alg = 'farsight'
# alg = 'myown'

# One stack

In [7]:
stack = 'MD595'

In [8]:
sections_to_filenames = metadata_cache['sections_to_filenames'][stack]
first_bs_section, last_bs_section = metadata_cache['section_limits'][stack]

In [6]:
output_dir = create_if_not_exists(os.path.join(DETECTED_CELLS_ROOTDIR, stack))

In [None]:
# for sec in [192, 242]:
for sec in range(first_bs_section, last_bs_section+1):
# for sec in range(117, last_bs_section+1):

    fn = sections_to_filenames[sec]
    if fn in ['Placeholder', 'Rescan', 'Nonexisting']:
        continue
        
    fn_output_dir = create_if_not_exists(os.path.join(output_dir, fn))

    sys.stderr.write('Processing section: %03d\n' % sec)

    jpeg_filename = DataManager.get_image_filepath(stack=stack, section=sec, resol='lossless', version='compressed')
    copyto_filename ='/home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD589/%(fn)s/%(fn)s_image.jpg' % {'fn':fn}
    execute_command("cp %s %s" % (jpeg_filename, copyto_filename))
    
    # Load mask
    t = time.time()
    mask_tb = DataManager.load_thumbnail_mask_v2(stack=stack, section=sec)
    mask = resize(mask_tb, metadata_cache['image_shape'][stack][::-1]) > .5
    sys.stderr.write('Load mask: %.2f\n' % (time.time() - t) )

    
    if alg == 'myown':
        
        img_filename = DataManager.get_image_filepath(stack=stack, section=sec, resol='lossless', version='cropped')

        img = imread(img_filename)
        sys.stderr.write('Load image: %.2f\n' % (time.time() - t) )

        t = time.time()
        im = rgb2gray(img)
        sys.stderr.write('Convert to gray: %.2f\n' % (time.time() - t) )

        t = time.time()

        thresh = threshold_otsu(im)
        binary = im < thresh
        binary[~mask] = 0

        sys.stderr.write('threshold: %.2f\n' % (time.time() - t) )
    #     imsave(fn_output_dir + '/%(fn)s_otsu.png' % {'fn':fn}, binary.astype(np.uint8)*255)

        # plt.imshow(binary, cmap=plt.cm.gray);
        # plt.axis('off');
        # plt.title('binary');

        t = time.time()
        dt = distance_transform_edt(binary)
        sys.stderr.write('distance transform: %.2f\n' % (time.time() - t) )

    #     imsave(fn_output_dir + '/%(fn)s_dt.png' % {'fn':fn}, img_as_ubyte(plt.cm.jet(dt/dt.max())))

        # plt.imshow(dt, cmap=plt.cm.gray);
        # plt.axis('off');
        # plt.title('distance tranform');

        t = time.time()
        local_maxi = peak_local_max(dt, labels=binary, footprint=np.ones((10, 10)), indices=False)
        sys.stderr.write('local max: %.2f\n' % (time.time() - t) )

        # plt.imshow(local_maxi, cmap=plt.cm.gray);
        # plt.axis('off');
        # plt.title('local maxima');

        t = time.time()
        markers = label(local_maxi)
        sys.stderr.write('label: %.2f\n' % (time.time() - t) )

        t = time.time()
        labels = watershed(-dt, markers, mask=binary)
        sys.stderr.write('watershed: %.2f\n' % (time.time() - t) )
        
    elif alg == 'cellprofiler':
        labels = bp.unpack_ndarray_file(output_dir + '/%(fn)s/%(fn)s_image_inverted_labelmap_cellprofiler.bp' % \
                                        dict(fn=fn))
        labels[~mask] = 0
    
    elif alg == 'farsight':
        labels = bp.unpack_ndarray_file(output_dir + '/%(fn)s/%(fn)s_image_inverted_labelmap_farsight.bp' % \
                                       dict(fn=fn))
        labels[~mask] = 0
    
    else:
        raise 'Algorithm not recognized.'
    
    # plt.imshow(labels);
    # plt.axis('off');
    # plt.title('labels');

    t = time.time()
    props = regionprops(labels)
    sys.stderr.write('regionprops: %.2f\n' % (time.time() - t) )

#     plt.hist([p.area for p in props], bins=100);
#     plt.title('Area histograms');
#     plt.show();

    valid_blob_indices = [i for i, p in enumerate(props) if p.area > min_blob_area and p.area < max_blob_area]
    sys.stderr.write('%d blobs identified.\n' % len(valid_blob_indices))
    
    # Get blobs
    t = time.time()
    valid_blob_coords = [props[i].coords for i in valid_blob_indices] # r,c
    #     pickle.dump(valid_blob_coords, open(fn_output_dir + '/%(fn)s_blobCoords.pkl' % {'fn':fn}, 'w'))
    pandas.Series(data=valid_blob_coords).to_hdf(fn_output_dir + '/%(fn)s_blobCoords.hdf' % {'fn': fn}, 'data', mode='w')
    sys.stderr.write('Save blob coords: %.2f\n' % (time.time() - t) )
    
    # Generate masks
    t = time.time()
#     bar = show_progress_bar(first_bs_section, last_bs_section)

    cell_masks = []
    cell_mask_centers = []
    for i, coords in enumerate(valid_blob_coords):
#         bar.value = i
        ymin, xmin = coords.min(axis=0)
        ymax, xmax = coords.max(axis=0)
        cell_mask = np.zeros((ymax+1-ymin, xmax+1-xmin), np.bool)
        cell_mask[coords[:,0]-ymin, coords[:,1]-xmin] = 1
        yc, xc = np.mean(np.where(cell_mask), axis=1)
        cell_masks.append(cell_mask)
        cell_mask_centers.append([xc, yc])
    
    pandas.Series(data=cell_masks).to_hdf(fn_output_dir + '/%(fn)s_blobMasks.hdf' % {'fn': fn}, 'data', mode='w')    
    bp.pack_ndarray_file(np.array(cell_mask_centers), fn_output_dir + '/%(fn)s_blobMaskCenters.bp' % {'fn':fn})

    sys.stderr.write('Save blob masks: %.2f\n' % (time.time() - t) )
    
    # Other blob attributes
    t = time.time()

#     valid_blob_contours = [find_contour_points(msk, sample_every=1)[1][0] for msk in cell_masks]

    def find_contour_worker(msk):
        if msk.shape[0] == 1:
            # if mask is a straight line, append another line to it.
            msk = np.vstack([msk, np.ones((msk.shape[1],))])
        elif msk.shape[1] == 1:
            msk = np.c_[msk, np.ones((msk.shape[0],))]
        return find_contour_points(msk, sample_every=1)[1][0]
        
    pool = Pool(16)
    valid_blob_contours = pool.map(lambda msk: find_contour_worker(msk), cell_masks)
    pool.terminate()
    pool.join()

#     pickle.dump(valid_blob_contours, open(fn_output_dir + '/%(fn)s_blobContours.pkl' % {'fn':fn}, 'w'))
    pandas.Series(data=valid_blob_contours).to_hdf(fn_output_dir + '/%(fn)s_blobContours.hdf' % {'fn': fn}, 'data', mode='w')
      
    sys.stderr.write('Save blob contours, save: %.2f\n' % (time.time() - t) )
    
    t = time.time()

    valid_blob_orientations = np.array([props[i].orientation for i in valid_blob_indices])
    valid_blob_centroids = np.array([props[i].centroid for i in valid_blob_indices])[:,::-1] # r,c -> x,y
    valid_blob_majorAxisLen = np.array([props[i].major_axis_length for i in valid_blob_indices])
    valid_blob_minorAxisLen = np.array([props[i].minor_axis_length for i in valid_blob_indices])

    bp.pack_ndarray_file(valid_blob_orientations, fn_output_dir + '/%(fn)s_blobOrientations.bp' % {'fn':fn})
    bp.pack_ndarray_file(valid_blob_centroids, fn_output_dir + '/%(fn)s_blobCentroids.bp' % {'fn':fn})
    bp.pack_ndarray_file(valid_blob_majorAxisLen, fn_output_dir + '/%(fn)s_blobMajorAxisLen.bp' % {'fn':fn})
    bp.pack_ndarray_file(valid_blob_minorAxisLen, fn_output_dir + '/%(fn)s_blobMinorAxisLen.bp' % {'fn':fn})

    blob_contours_global = [(valid_blob_contours[i] - cell_mask_centers[i] + valid_blob_centroids[i]).astype(np.int)
                            for i in range(len(valid_blob_coords))]
    pandas.Series(data=blob_contours_global).to_hdf(fn_output_dir + '/%(fn)s_blobContoursGlobal_%(alg)s.hdf' % {'fn': fn, 'alg':alg}, 
                                                    'data', mode='w')
    
    sys.stderr.write('Compute blob properties, save: %.2f\n' % (time.time() - t) )

# All stacks

In [None]:
# for stack in all_nissl_stacks:
for stack in ['MD602', 'MD603']:
        
    sections_to_filenames = metadata_cache['sections_to_filenames'][stack]
    first_bs_section, last_bs_section = metadata_cache['section_limits'][stack]
    
    output_dir = create_if_not_exists(os.path.join(DETECTED_CELLS_ROOTDIR, stack))
    
    for sec in range(first_bs_section, last_bs_section+1):
    
        fn = sections_to_filenames[sec]
        
        if is_invalid(fn=fn):
            continue

        fn_output_dir = create_if_not_exists(os.path.join(output_dir, fn))

        sys.stderr.write('Processing section: %03d\n' % sec)

        jpeg_filename = DataManager.get_image_filepath(stack=stack, section=sec, resol='lossless', version='compressed')
        copyto_filename ='/home/yuncong/csd395/CSHL_cells_v2/detected_cells/%(stack)s/%(fn)s/%(fn)s_image.jpg' % {'fn':fn, 'stack':stack}
        execute_command("cp %s %s" % (jpeg_filename, copyto_filename))

        # Load mask
        t = time.time()
        mask_tb = DataManager.load_thumbnail_mask_v2(stack=stack, section=sec)
        mask = resize(mask_tb, metadata_cache['image_shape'][stack][::-1]) > .5
        sys.stderr.write('Load mask: %.2f\n' % (time.time() - t) )


        if alg == 'myown':

            img_filename = DataManager.get_image_filepath(stack=stack, section=sec, resol='lossless', version='cropped')

            img = imread(img_filename)
            sys.stderr.write('Load image: %.2f\n' % (time.time() - t) )

            t = time.time()
            im = rgb2gray(img)
            sys.stderr.write('Convert to gray: %.2f\n' % (time.time() - t) )

            t = time.time()

            thresh = threshold_otsu(im)
            binary = im < thresh
            binary[~mask] = 0

            sys.stderr.write('threshold: %.2f\n' % (time.time() - t) )
        #     imsave(fn_output_dir + '/%(fn)s_otsu.png' % {'fn':fn}, binary.astype(np.uint8)*255)

            # plt.imshow(binary, cmap=plt.cm.gray);
            # plt.axis('off');
            # plt.title('binary');

            t = time.time()
            dt = distance_transform_edt(binary)
            sys.stderr.write('distance transform: %.2f\n' % (time.time() - t) )

        #     imsave(fn_output_dir + '/%(fn)s_dt.png' % {'fn':fn}, img_as_ubyte(plt.cm.jet(dt/dt.max())))

            # plt.imshow(dt, cmap=plt.cm.gray);
            # plt.axis('off');
            # plt.title('distance tranform');

            t = time.time()
            local_maxi = peak_local_max(dt, labels=binary, footprint=np.ones((10, 10)), indices=False)
            sys.stderr.write('local max: %.2f\n' % (time.time() - t) )

            # plt.imshow(local_maxi, cmap=plt.cm.gray);
            # plt.axis('off');
            # plt.title('local maxima');

            t = time.time()
            markers = label(local_maxi)
            sys.stderr.write('label: %.2f\n' % (time.time() - t) )

            t = time.time()
            labels = watershed(-dt, markers, mask=binary)
            sys.stderr.write('watershed: %.2f\n' % (time.time() - t) )

        elif alg == 'cellprofiler':
            labels = bp.unpack_ndarray_file(output_dir + '/%(fn)s/%(fn)s_image_inverted_labelmap_cellprofiler.bp' % \
                                            dict(fn=fn))
            labels[~mask] = 0

        elif alg == 'farsight':
            labels = bp.unpack_ndarray_file(output_dir + '/%(fn)s/%(fn)s_image_inverted_labelmap_farsight.bp' % \
                                           dict(fn=fn))
            labels[~mask] = 0

        else:
            raise 'Algorithm not recognized.'

        # plt.imshow(labels);
        # plt.axis('off');
        # plt.title('labels');

        t = time.time()
        props = regionprops(labels)
        sys.stderr.write('regionprops: %.2f\n' % (time.time() - t) )

    #     plt.hist([p.area for p in props], bins=100);
    #     plt.title('Area histograms');
    #     plt.show();

        valid_blob_indices = [i for i, p in enumerate(props) if p.area > min_blob_area and p.area < max_blob_area]
        sys.stderr.write('%d blobs identified.\n' % len(valid_blob_indices))

        # Get blobs
        t = time.time()
        valid_blob_coords = [props[i].coords for i in valid_blob_indices] # r,c
        #     pickle.dump(valid_blob_coords, open(fn_output_dir + '/%(fn)s_blobCoords.pkl' % {'fn':fn}, 'w'))
        pandas.Series(data=valid_blob_coords).to_hdf(fn_output_dir + '/%(fn)s_blobCoords.hdf' % {'fn': fn}, 'data', mode='w')
        sys.stderr.write('Save blob coords: %.2f\n' % (time.time() - t) )

        # Generate masks
        t = time.time()
    #     bar = show_progress_bar(first_bs_section, last_bs_section)

        cell_masks = []
        cell_mask_centers = []
        for i, coords in enumerate(valid_blob_coords):
    #         bar.value = i
            ymin, xmin = coords.min(axis=0)
            ymax, xmax = coords.max(axis=0)
            cell_mask = np.zeros((ymax+1-ymin, xmax+1-xmin), np.bool)
            cell_mask[coords[:,0]-ymin, coords[:,1]-xmin] = 1
            yc, xc = np.mean(np.where(cell_mask), axis=1)
            cell_masks.append(cell_mask)
            cell_mask_centers.append([xc, yc])

        pandas.Series(data=cell_masks).to_hdf(fn_output_dir + '/%(fn)s_blobMasks.hdf' % {'fn': fn}, 'data', mode='w')    
        bp.pack_ndarray_file(np.array(cell_mask_centers), fn_output_dir + '/%(fn)s_blobMaskCenters.bp' % {'fn':fn})

        sys.stderr.write('Save blob masks: %.2f\n' % (time.time() - t) )

        # Other blob attributes
        t = time.time()

    #     valid_blob_contours = [find_contour_points(msk, sample_every=1)[1][0] for msk in cell_masks]

        def find_contour_worker(msk):
            if msk.shape[0] == 1:
                # if mask is a straight line, append another line to it.
                msk = np.vstack([msk, np.ones((msk.shape[1],))])
            elif msk.shape[1] == 1:
                msk = np.c_[msk, np.ones((msk.shape[0],))]
            return find_contour_points(msk, sample_every=1)[1][0]

        pool = Pool(16)
        valid_blob_contours = pool.map(lambda msk: find_contour_worker(msk), cell_masks)
        pool.terminate()
        pool.join()

    #     pickle.dump(valid_blob_contours, open(fn_output_dir + '/%(fn)s_blobContours.pkl' % {'fn':fn}, 'w'))
        pandas.Series(data=valid_blob_contours).to_hdf(fn_output_dir + '/%(fn)s_blobContours.hdf' % {'fn': fn}, 'data', mode='w')

        sys.stderr.write('Save blob contours, save: %.2f\n' % (time.time() - t) )

        t = time.time()

        valid_blob_orientations = np.array([props[i].orientation for i in valid_blob_indices])
        valid_blob_centroids = np.array([props[i].centroid for i in valid_blob_indices])[:,::-1] # r,c -> x,y
        valid_blob_majorAxisLen = np.array([props[i].major_axis_length for i in valid_blob_indices])
        valid_blob_minorAxisLen = np.array([props[i].minor_axis_length for i in valid_blob_indices])

        bp.pack_ndarray_file(valid_blob_orientations, fn_output_dir + '/%(fn)s_blobOrientations.bp' % {'fn':fn})
        bp.pack_ndarray_file(valid_blob_centroids, fn_output_dir + '/%(fn)s_blobCentroids.bp' % {'fn':fn})
        bp.pack_ndarray_file(valid_blob_majorAxisLen, fn_output_dir + '/%(fn)s_blobMajorAxisLen.bp' % {'fn':fn})
        bp.pack_ndarray_file(valid_blob_minorAxisLen, fn_output_dir + '/%(fn)s_blobMinorAxisLen.bp' % {'fn':fn})

        blob_contours_global = [(valid_blob_contours[i] - cell_mask_centers[i] + valid_blob_centroids[i]).astype(np.int)
                                for i in range(len(valid_blob_coords))]
        pandas.Series(data=blob_contours_global).to_hdf(fn_output_dir + '/%(fn)s_blobContoursGlobal_%(alg)s.hdf' % {'fn': fn, 'alg':alg}, 
                                                        'data', mode='w')

        sys.stderr.write('Compute blob properties, save: %.2f\n' % (time.time() - t) )


Processing section: 110


cp /oasis/projects/nsf/csd395/yuncong/CSHL_data_processed/MD602/MD602_lossless_alignedTo_MD602-N49-2015.12.01-18.41.46_MD602_2_0146_cropped_compressed/MD602-N19-2015.12.01-16.24.09_MD602_2_0056_lossless_alignedTo_MD602-N49-2015.12.01-18.41.46_MD602_2_0146_cropped_compressed.jpg /home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD602/MD602-N19-2015.12.01-16.24.09_MD602_2_0056/MD602-N19-2015.12.01-16.24.09_MD602_2_0056_image.jpg


Child returned 0
Load mask: 15.54
regionprops: 1.94
44270 blobs identified.
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->values] [items->None]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)
Save blob coords: 2.56
Save blob masks: 4.47
Save blob contours, save: 7.47
Compute blob properties, save: 9.39
Processing section: 111


cp /oasis/projects/nsf/csd395/yuncong/CSHL_data_processed/MD602/MD602_lossless_alignedTo_MD602-N49-2015.12.01-18.41.46_MD602_2_0146_cropped_compressed/MD602-IHC19-2015.12.01-22.47.08_MD602_2_0056_lossless_alignedTo_MD602-N49-2015.12.01-18.41.46_MD602_2_0146_cropped_compressed.jpg /home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD602/MD602-IHC19-2015.12.01-22.47.08_MD602_2_0056/MD602-IHC19-2015.12.01-22.47.08_MD602_2_0056_image.jpg


Child returned 0
