In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import time
import cv2

# from joblib import Parallel, delayed

sys.path.append(os.path.join(os.environ['REPO_DIR'], 'utilities'))
from utilities2015 import *
from data_manager import *
from metadata import *

import matplotlib.pyplot as plt
%matplotlib inline

Setting environment for Gordon


No vtk


In [2]:
from skimage.filters import threshold_otsu
from scipy.ndimage.morphology import distance_transform_edt
from skimage.feature import peak_local_max
from skimage.morphology import watershed
from skimage.measure import regionprops, label, find_contours
from skimage.transform import resize

from annotation_utilities import *
from registration_utilities import *

import pandas

In [3]:
stack = 'MD589'

In [4]:
min_blob_area = 10
max_blob_area = 10000

In [5]:
sections_to_filenames = metadata_cache['sections_to_filenames'][stack]
first_bs_section, last_bs_section = metadata_cache['section_limits'][stack]

In [6]:
output_dir = create_if_not_exists('/home/yuncong/csd395/CSHL_cells_v2/detected_cells/' + stack)

In [7]:
alg = 'cellprofiler'
# alg = 'farsight'
# alg = 'myown'

In [9]:
for sec in [192, 242]:
# for sec in range(first_bs_section, last_bs_section, 50):
# for sec in range(242, last_bs_section, 50):

    fn = sections_to_filenames[sec]
    fn_output_dir = create_if_not_exists(os.path.join(output_dir, fn))

    sys.stderr.write('Processing section: %03d\n' % sec)

    jpeg_filename = DataManager.get_image_filepath(stack=stack, section=sec, resol='lossless', version='compressed')
    copyto_filename ='/home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD589/%(fn)s/%(fn)s_image.jpg' % {'fn':fn}
    execute_command("cp %s %s" % (jpeg_filename, copyto_filename))
    
    if alg == 'myown':
        
        img_filename = DataManager.get_image_filepath(stack=stack, section=sec, resol='lossless', version='cropped')

        img = imread(img_filename)
        sys.stderr.write('Load image: %.2f\n' % (time.time() - t) )

        t = time.time()
        im = rgb2gray(img)
        sys.stderr.write('Convert to gray: %.2f\n' % (time.time() - t) )

        
        t = time.time()

        mask_tb = DataManager.load_thumbnail_mask_v2(stack=stack, section=sec)
        mask = resize(mask_tb, img.shape[:2]) > .5

        sys.stderr.write('Load mask: %.2f\n' % (time.time() - t) )


        t = time.time()

        thresh = threshold_otsu(im)
        binary = im < thresh
        binary[~mask] = 0

        sys.stderr.write('threshold: %.2f\n' % (time.time() - t) )
    #     imsave(fn_output_dir + '/%(fn)s_otsu.png' % {'fn':fn}, binary.astype(np.uint8)*255)

        # plt.imshow(binary, cmap=plt.cm.gray);
        # plt.axis('off');
        # plt.title('binary');

        t = time.time()
        dt = distance_transform_edt(binary)
        sys.stderr.write('distance transform: %.2f\n' % (time.time() - t) )

    #     imsave(fn_output_dir + '/%(fn)s_dt.png' % {'fn':fn}, img_as_ubyte(plt.cm.jet(dt/dt.max())))

        # plt.imshow(dt, cmap=plt.cm.gray);
        # plt.axis('off');
        # plt.title('distance tranform');

        t = time.time()
        local_maxi = peak_local_max(dt, labels=binary, footprint=np.ones((10, 10)), indices=False)
        sys.stderr.write('local max: %.2f\n' % (time.time() - t) )

        # plt.imshow(local_maxi, cmap=plt.cm.gray);
        # plt.axis('off');
        # plt.title('local maxima');

        t = time.time()
        markers = label(local_maxi)
        sys.stderr.write('label: %.2f\n' % (time.time() - t) )

        t = time.time()
        labels = watershed(-dt, markers, mask=binary)
        sys.stderr.write('watershed: %.2f\n' % (time.time() - t) )
        
    elif alg == 'cellprofiler':
        labels = bp.unpack_ndarray_file(output_dir + '/%(fn)s/%(fn)s_image_inverted_labelmap_cellprofiler.bp' % \
                                        dict(fn=fn))
        labels[~mask] = 0
    
    elif alg == 'farsight':
        labels = bp.unpack_ndarray_file(output_dir + '/%(fn)s/%(fn)s_image_inverted_labelmap_farsight.bp' % \
                                       dict(fn=fn))
        labels[~mask] = 0
    
    else:
        raise 'Algorithm not recognized.'
    
    # plt.imshow(labels);
    # plt.axis('off');
    # plt.title('labels');

    t = time.time()
    props = regionprops(labels)
    sys.stderr.write('regionprops: %.2f\n' % (time.time() - t) )

#     plt.hist([p.area for p in props], bins=100);
#     plt.title('Area histograms');
#     plt.show();

    valid_blob_indices = [i for i, p in enumerate(props) if p.area > min_blob_area and p.area < max_blob_area]
    sys.stderr.write('%d blobs identified.\n' % len(valid_blob_indices))
    
    # Get blobs
    t = time.time()
    valid_blob_coords = [props[i].coords for i in valid_blob_indices] # r,c
    #     pickle.dump(valid_blob_coords, open(fn_output_dir + '/%(fn)s_blobCoords.pkl' % {'fn':fn}, 'w'))
    pandas.Series(data=valid_blob_coords).to_hdf(fn_output_dir + '/%(fn)s_blobCoords.hdf' % {'fn': fn}, 'data', mode='w')
    sys.stderr.write('Save blob coords: %.2f\n' % (time.time() - t) )
    
    # Generate masks
    t = time.time()
    bar = show_progress_bar(first_bs_section, last_bs_section)

    cell_masks = []
    cell_mask_centers = []
    for i, coords in enumerate(valid_blob_coords):
        bar.value = i
        ymin, xmin = coords.min(axis=0)
        ymax, xmax = coords.max(axis=0)
        cell_mask = np.zeros((ymax+1-ymin, xmax+1-xmin), np.bool)
        cell_mask[coords[:,0]-ymin, coords[:,1]-xmin] = 1
        yc, xc = np.mean(np.where(cell_mask), axis=1)
        cell_masks.append(cell_mask)
        cell_mask_centers.append([xc, yc])
    
    pandas.Series(data=cell_masks).to_hdf(fn_output_dir + '/%(fn)s_blobMasks.hdf' % {'fn': fn}, 'data', mode='w')    
    bp.pack_ndarray_file(np.array(cell_mask_centers), fn_output_dir + '/%(fn)s_blobMaskCenters.bp' % {'fn':fn})

    sys.stderr.write('Save blob masks: %.2f\n' % (time.time() - t) )
    
    # Other blob attributes
    t = time.time()

#     valid_blob_contours = [find_contour_points(msk, sample_every=1)[1][0] for msk in cell_masks]

    def find_contour_worker(msk):
        if msk.shape[0] == 1:
            # if mask is a straight line, append another line to it.
            msk = np.vstack([msk, np.ones((msk.shape[1],))])
        elif msk.shape[1] == 1:
            msk = np.c_[msk, np.ones((msk.shape[0],))]
        return find_contour_points(msk, sample_every=1)[1][0]
        
    pool = Pool(16)
    valid_blob_contours = pool.map(lambda msk: find_contour_worker(msk), cell_masks)
    pool.terminate()
    pool.join()

#     pickle.dump(valid_blob_contours, open(fn_output_dir + '/%(fn)s_blobContours.pkl' % {'fn':fn}, 'w'))
    pandas.Series(data=valid_blob_contours).to_hdf(fn_output_dir + '/%(fn)s_blobContours.hdf' % {'fn': fn}, 'data', mode='w')
      
    sys.stderr.write('Save blob contours, save: %.2f\n' % (time.time() - t) )
    
    t = time.time()

    valid_blob_orientations = np.array([props[i].orientation for i in valid_blob_indices])
    valid_blob_centroids = np.array([props[i].centroid for i in valid_blob_indices])[:,::-1] # r,c -> x,y
    valid_blob_majorAxisLen = np.array([props[i].major_axis_length for i in valid_blob_indices])
    valid_blob_minorAxisLen = np.array([props[i].minor_axis_length for i in valid_blob_indices])

    bp.pack_ndarray_file(valid_blob_orientations, fn_output_dir + '/%(fn)s_blobOrientations.bp' % {'fn':fn})
    bp.pack_ndarray_file(valid_blob_centroids, fn_output_dir + '/%(fn)s_blobCentroids.bp' % {'fn':fn})
    bp.pack_ndarray_file(valid_blob_majorAxisLen, fn_output_dir + '/%(fn)s_blobMajorAxisLen.bp' % {'fn':fn})
    bp.pack_ndarray_file(valid_blob_minorAxisLen, fn_output_dir + '/%(fn)s_blobMinorAxisLen.bp' % {'fn':fn})

    blob_contours_global = [(valid_blob_contours[i] - cell_mask_centers[i] + valid_blob_centroids[i]).astype(np.int)
                            for i in range(len(valid_blob_coords))]
    pandas.Series(data=blob_contours_global).to_hdf(fn_output_dir + '/%(fn)s_blobContoursGlobal_%(alg)s.hdf' % {'fn': fn, 'alg':alg}, 
                                                    'data', mode='w')
    
    sys.stderr.write('Compute blob properties, save: %.2f\n' % (time.time() - t) )

Processing section: 192


cp /oasis/projects/nsf/csd395/yuncong/CSHL_data_processed/MD589/MD589_lossless_unsorted_alignedTo_MD589-IHC31-2015.07.30-23.26.22_MD589_1_0091_cropped_compressed/MD589-N34-2015.07.30-18.20.43_MD589_2_0101_lossless_alignedTo_MD589-IHC31-2015.07.30-23.26.22_MD589_1_0091_cropped_compressed.jpg /home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD589/MD589-N34-2015.07.30-18.20.43_MD589_2_0101/MD589-N34-2015.07.30-18.20.43_MD589_2_0101_image.jpg


Child returned 0
  strip = decompress(strip)
Load image: 56.61
Convert to gray: 8.36
Load mask: 10.81
regionprops: 3.24
92351 blobs identified.
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->values] [items->None]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)
Save blob coords: 5.19
Save blob masks: 11.61
Save blob contours, save: 14.28
Compute blob properties, save: 15.01
Processing section: 242


cp /oasis/projects/nsf/csd395/yuncong/CSHL_data_processed/MD589/MD589_lossless_unsorted_alignedTo_MD589-IHC31-2015.07.30-23.26.22_MD589_1_0091_cropped_compressed/MD589-N47-2015.07.30-19.12.19_MD589_1_0139_lossless_alignedTo_MD589-IHC31-2015.07.30-23.26.22_MD589_1_0091_cropped_compressed.jpg /home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD589/MD589-N47-2015.07.30-19.12.19_MD589_1_0139/MD589-N47-2015.07.30-19.12.19_MD589_1_0139_image.jpg


Child returned 0
Load image: 54.34
Convert to gray: 8.33
Load mask: 9.52
regionprops: 2.74
80647 blobs identified.
Save blob coords: 3.98
Save blob masks: 10.19
Save blob contours, save: 12.63
Compute blob properties, save: 13.85


# Visualize

In [10]:
# Visualize

for sec in [192, 242]:
# for sec in range(first_bs_section, last_bs_section, 50):

    t = time.time()
    
    fn = sections_to_filenames[sec]
    fn_output_dir = create_if_not_exists(os.path.join(output_dir, fn))

    blob_contours = pandas.read_hdf(fn_output_dir + '/%(fn)s_blobContoursGlobal_%(alg)s.hdf' % {'fn': fn, 'alg': alg}, 'data')
    
    w, h = metadata_cache['image_shape'][stack]
    
    n_blobs = len(blob_contours)
    
    blob_contour_viz = np.zeros((h, w, 4), np.uint8)
    
    bar = show_progress_bar(first_bs_section, last_bs_section)
    for i in range(n_blobs):
        bar.value = i
        cnt = blob_contours[i]
        blob_contour_viz[np.minimum(h-1, np.maximum(cnt[:,1], 0)), 
                         np.minimum(w-1, np.maximum(cnt[:,0], 0))] = (255,0,0,255)
#         img[np.minimum(h-1, np.maximum(cnt[:,1], 0)), np.minimum(w-1, np.maximum(cnt[:,0], 0))] = (255,0,0)

    imsave('/home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD589/%(fn)s/%(fn)s_blobContours_viz_%(alg)s.png' % {'fn':fn, 'alg': alg}, 
           blob_contour_viz)

#     imsave('/home/yuncong/csd395/CSHL_cells_v2/detected_cells/MD589/%(fn)s/%(fn)s_detection_image.jpg' % {'fn':fn}, img)
    sys.stderr.write('Save image: %.2f\n' % (time.time() - t) )

Save image: 37.84
Save image: 35.43
