In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import time
import cv2

from joblib import Parallel, delayed

sys.path.append(os.path.join(os.environ['REPO_DIR'], 'utilities'))
from utilities2015 import *
from data_manager import *
from metadata import *

import matplotlib.pyplot as plt
%matplotlib inline

from skimage.transform import rotate
from multiprocess import Pool

import pandas

from cell_utilities import *

Setting environment for Gordon


No vtk


In [2]:
stack = 'MD589'

In [3]:
DETECTED_CELLS_DIR = '/home/yuncong/csd395/CSHL_cells_v2/detected_cells/'
PROCESSED_CELLS_DIR = '/home/yuncong/csd395/CSHL_cells_v2/processed_cells/'

In [4]:
def load_cell_list_data(what):
    if what == 'address':
        fn = 'cell_addresses.bp'
    elif what == 'mask_aligned_padded':
        fn = 'cells_aligned_padded.bp'
    elif what == 'mask_aligned_padded_horizontal':
        fn = 'cells_h_aligned_padded.bp'
    elif what == 'mask_aligned_padded_vertical':
        fn = 'cells_v_aligned_padded.bp'
    elif what == 'mask_aligned_padded_diagonal':
        fn = 'cells_d_aligned_padded.bp'
    elif what == 'largest30p_indices':
        fn = 'largest30p_indices.bp'
    elif what == 'embedding':
        fn = 'embeddings_normalized_largest30p_nystromSample100.bp'
        
    return bp.unpack_ndarray_file(os.path.join(PROCESSED_CELLS_DIR, fn))

In [5]:
def load_cell_data(what, stack, sections):
    
    sections_to_filenames = metadata_cache['sections_to_filenames'][stack]
    
    if what == 'orientation':
        fn_template = '%(fn)s_blobOrientations.bp'
    elif what == 'major':
        fn_template = '%(fn)s_blobMajorAxisLen.bp'
    elif what == 'minor':
        fn_template = '%(fn)s_blobMinorAxisLen.bp'
    elif what == 'mask_center':
        fn_template = '%(fn)s_blobMaskCenters.bp'
    elif what == 'mask':
        fn_template = '%(fn)s_blobMasks.hdf'
    elif what == 'centroid':
        fn_template = '%(fn)s_blobCentroids.bp'
    elif what == 'contours':
        fn_template = '%(fn)s_blobContours.hdf'
    elif what == 'image':
        fn_template = '%(fn)s_image.jpg'
    else:
        raise Exception('Not recognized.')

    data = {}
    for sec in sections:
        fn = sections_to_filenames[sec]
        fp = os.path.join(DETECTED_CELLS_DIR, stack, '%(fn)s', fn_template) % {'fn': fn}
        if fp.endswith('bp'):
            data[sec] = bp.unpack_ndarray_file(fp)
        elif fp.endswith('jpg'):
            data[sec] = imread(fp)
        elif fp.endswith('hdf'):
            data[sec] = pandas.read_hdf(fp)
        else:
            raise Exception('Not recognized.')
            
    return data

In [15]:
cell_orientations_per_section = load_cell_data('orientation', stack=stack, sections=[192,242])
cell_centroids_per_section = load_cell_data('centroid', stack=stack, sections=[192,242])

In [20]:
cell_numbers_per_section = {sec: x.shape[0] for sec, x in cell_centroids_per_section.iteritems()}

In [21]:
cell_addresses = load_cell_list_data('address')

In [22]:
cell_masks_aligned_padded = load_cell_list_data('mask_aligned_padded')
cell_masks_aligned_padded_horizontal = load_cell_list_data('mask_aligned_padded_horizontal')
cell_masks_aligned_padded_vertical = load_cell_list_data('mask_aligned_padded_vertical')
cell_masks_aligned_padded_diagonal = load_cell_list_data('mask_aligned_padded_diagonal')

In [23]:
cell_masks_per_section = load_cell_data('mask', stack, [192, 242])
cell_sizes_per_section = {sec: [np.count_nonzero(msk) for msk in masks] for sec, masks in cell_masks_per_section.iteritems()}

In [None]:
def group_according_to_address(x, addresses):
    
    x_per_section = defaultdict(dict)

    for (sec, secwise_idx), xi in zip(addresses, x):
        x_per_section[sec][secwise_idx] = xi

    x_per_section.default_factory = None
    
    return x_per_section

cell_masks_aligned_padded_per_section = group_according_to_address(cell_masks_aligned_padded, cell_addresses)
cell_masks_aligned_padded_horizontal_per_section = group_according_to_address(cell_masks_aligned_padded_horizontal, cell_addresses)
cell_masks_aligned_padded_vertical_per_section = group_according_to_address(cell_masks_aligned_padded_vertical, cell_addresses)
cell_masks_aligned_padded_diagonal_per_section = group_according_to_address(cell_masks_aligned_padded_diagonal, cell_addresses)

def flatten_mask_dict(cell_masks_aligned_padded_per_section):
    cell_masks_aligned_padded_flattened_per_section = {sec: {secwise_idx: mask.flatten() 
                                                         for secwise_idx, mask in masks.iteritems()}
                                                   for sec, masks in cell_masks_aligned_padded_per_section.iteritems()}
    return cell_masks_aligned_padded_flattened_per_section

cell_masks_aligned_padded_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_per_section)
cell_masks_aligned_padded_horizontal_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_horizontal_per_section)
cell_masks_aligned_padded_vertical_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_vertical_per_section)
cell_masks_aligned_padded_diagonal_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_diagonal_per_section)

In [24]:
large_cell_indices = load_cell_list_data('largest30p_indices')

In [26]:
cell_types_per_section = {sec: np.zeros((cell_numbers_per_section[sec],), dtype=np.uint8) for sec in [192, 242]}

for i in large_cell_indices:
    sec, idx = cell_addresses[i]
    cell_types_per_section[sec][idx] = 1

In [27]:
large_cell_indices_per_section = {sec: np.where(types)[0] for sec, types in cell_types.iteritems()}

In [28]:
large_cell_embedding = load_cell_list_data('embedding')

In [29]:
from scipy.spatial.kdtree import KDTree
from scipy.spatial.distance import cdist, pdist

In [30]:
sec = 192

In [32]:
# Find each cell's neighbors

t = time.time()

tree = KDTree(cell_centroids_per_section[sec])

pool = Pool(12)
neighbors = pool.map(lambda i: list(set(tree.query_ball_point(cell_centroids_per_section[sec][i], r=100)) - {i}),
                     large_cell_indices_per_section[sec])
pool.terminate()
pool.join()

sys.stderr.write('Neighbor search: %.2f seconds\n' % (time.time()-t)) # 10 seconds

neighbors = dict(zip(large_cell_indices_per_section[sec], neighbors))

Neighbor search: 10.09 seconds


In [34]:
neighbor_vectors = {i: cell_centroids_per_section[sec][i] - cell_centroids_per_section[sec][nns] 
                    for i, nns in neighbors.iteritems()}

In [35]:
# Binning each cell's neighbors

# radial_bins = np.logspace(0, 2, 10, base=10)
radial_bins = np.linspace(0, 100, 10)
n_radial = len(radial_bins)

angular_bins = np.linspace(-np.pi, np.pi, 8)
n_angular = len(angular_bins)


radial_indices_all = {}
angular_indices_all = {}
for i in neighbor_vectors.iterkeys():
    radial_indices, angular_indices = allocate_radial_angular_bins(neighbor_vectors[i], 
                                                                   cell_orientations_per_section[sec][i],
                                                        angular_bins=angular_bins, radial_bins=radial_bins)
    radial_indices_all[i] = radial_indices
    angular_indices_all[i] = angular_indices

In [37]:
print neighbors[16]
print neighbor_vectors[16]
print radial_indices_all[16]
print angular_indices_all[16]

In [65]:
# Load annotation contours

contour_df, _ = DataManager.load_annotation_v3(stack=stack, annotation_rootdir=annotation_midbrainIncluded_v2_rootdir)

downsample_factor = 1

anchor_filename = metadata_cache['anchor_fn'][stack]
sections_to_filenames = metadata_cache['sections_to_filenames'][stack]
filenames_to_sections = {f: s for s, f in sections_to_filenames.iteritems()
                        if f not in ['Placeholder', 'Nonexisting', 'Rescan']}

# Load transforms, defined on thumbnails
import cPickle as pickle
Ts = pickle.load(open(thumbnail_data_dir + '/%(stack)s/%(stack)s_elastix_output/%(stack)s_transformsTo_anchor.pkl' % dict(stack=stack), 'r'))

Ts_inv_downsampled = {}
for fn, T0 in Ts.iteritems():
    T = T0.copy()
    T[:2, 2] = T[:2, 2] * 32 / downsample_factor
    Tinv = np.linalg.inv(T)
    Ts_inv_downsampled[fn] = Tinv

# Load bounds
crop_xmin, crop_xmax, crop_ymin, crop_ymax = metadata_cache['cropbox'][stack]
print 'crop:', crop_xmin, crop_xmax, crop_ymin, crop_ymax

# tb_vol_xmin, tb_vol_xmax, tb_vol_ymin, tb_vol_ymax, tb_vol_zmin, tb_vol_zmax = \
# np.loadtxt(os.path.join(VOLUME_ROOTDIR, stack, '%(stack)s_down%(downsample)dVolume_bbox.txt') % \
#            dict(stack=stack, downsample=32), dtype=np.int)
# print 'tb_vol:', tb_vol_xmin, tb_vol_xmax, tb_vol_ymin, tb_vol_ymax, tb_vol_zmin, tb_vol_zmax

'No object named structures in the file'
crop: 569 1053 140 512


Annotation has no structures.


In [71]:
section_to_analyze = 192

image = load_cell_data('image', stack=stack, sections=[section_to_analyze])[section_to_analyze]

In [94]:
# Identify large and small cells inside each structure.

large_cell_indices_this_section = large_cell_indices_per_section[section_to_analyze]
large_cell_centroids_this_section = cell_centroids_per_section[section_to_analyze][large_cell_indices_this_section]

# for name_u, cnt in contours.iteritems():
name_u = '7N'
cnt = contours[name_u]

n = len(cnt)
fn = sections_to_filenames[section_to_analyze]
vertices_on_aligned = np.dot(Ts_inv_downsampled[fn], np.c_[cnt/downsample_factor, np.ones((n,))].T).T[:, :2]

xs = vertices_on_aligned[:,0] - crop_xmin * 32 / downsample_factor
ys = vertices_on_aligned[:,1] - crop_ymin * 32 / downsample_factor

cnt_cropped = np.c_[xs, ys].astype(np.int)

# Get large cells

# from shapely.geometry import Polygon
from matplotlib.path import Path

large_cell_is_inside = Path(cnt_cropped.astype(np.int)).contains_points(large_cell_centroids_this_section)
large_cell_indices_inside = large_cell_indices_this_section[large_cell_is_inside]

print '%d large cells are identified in %s.' % (len(large_cell_indices_inside), name_u)

# Small cells

small_cell_indices_this_section = np.array(list(set(range(cell_numbers_per_section[section_to_analyze])) - set(large_cell_indices_this_section.tolist())))
small_cell_centroids_this_section = cell_centroids_per_section[section_to_analyze][small_cell_indices_this_section]
small_cell_is_inside = Path(cnt_cropped.astype(np.int)).contains_points(small_cell_centroids_this_section)
small_cell_indices_inside = small_cell_indices_this_section[small_cell_is_inside]

print '%d small cells are identified in %s.' % (len(small_cell_indices_inside), name_u)

153 large cells are identified in 7N.
291 small cells are identified in 7N.


In [72]:
# Visualize cell contours

cell_contours = load_cell_data('contours', stack=stack, sections=[section_to_analyze])[section_to_analyze]
cell_mask_centers = load_cell_data('mask_center', stack=stack, sections=[section_to_analyze])[section_to_analyze]
cell_contours_global = {i: cell_centroids_per_section[section_to_analyze][i].astype(np.int) - cell_mask_centers[i] + cell_contours[i]
                        for i in range(cell_numbers_per_section[section_to_analyze])}

In [73]:
viz = image.copy()

for i in large_cell_indices_inside:
    cv2.polylines(viz, [cell_contours_global[i].astype(np.int)], isClosed=True, color=(255,0,0), thickness=1)
    
for i in small_cell_indices_inside:
    cv2.polylines(viz, [cell_contours_global[i].astype(np.int)], isClosed=True, color=(0,255,0), thickness=1)
        
cv2.polylines(viz, [cnt_cropped.astype(np.int)], isClosed=True, color=(0,0,255), thickness=1)

# Visualize radial-angular histogram of selected cells

# for source in large_cell_indices_inside:
for source in large_cell_indices_inside[50:51]:
    for i in neighbors[source]:
        
        if i in large_cell_indices_inside:
            cv2.circle(viz, tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 3, color=(255,0,0), thickness=-1)
            
            cv2.line(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                 tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 
                 color=(255,0,0), thickness=1)


        if i in small_cell_indices_inside:
            cv2.circle(viz, tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 3, color=(0,255,0), thickness=-1)            
            
            cv2.line(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                     tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 
                     color=(0,255,0), thickness=1)

        
    # draw radial bins
    for rb in radial_bins:
        cv2.circle(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                  int(rb), color=(0,0,255), thickness=1)

    # draw angular bins
    for ab in angular_bins:
        cv2.line(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                 tuple((cell_centroids_per_section[section_to_analyze][source] + (np.cos(ab)*100, np.sin(ab)*100)).astype(np.int)),
         color=(0,0,255), thickness=1)

# Find crop box

margin = 50
roi_xmin, roi_ymin = cnt_cropped.astype(np.int).min(axis=0) - margin
roi_xmax, roi_ymax = cnt_cropped.astype(np.int).max(axis=0) + margin
print roi_xmin, roi_xmax, roi_ymin, roi_ymax

display_image(viz[roi_ymin:roi_ymax+1, roi_xmin:roi_xmax+1])

7174 8481 10269 11122


In [79]:
# Construct graph

import networkx as nx
g = nx.Graph()

In [83]:
for source_sectionwise_idx in large_cell_indices_inside:
    
    neighbor_masks = np.array([cell_masks_aligned_padded_flattened_per_section[section_to_analyze][i_sectionwise_idx]
                      for i_sectionwise_idx in neighbors[source_sectionwise_idx]])
    
    jacs, _ = compute_jaccard_x_vs_list(cell_masks_aligned_padded_flattened_per_section[section_to_analyze][source_sectionwise_idx], 
                                    neighbor_masks,
                                    x_h=cell_masks_aligned_padded_horizontal_flattened_per_section[section_to_analyze][source_sectionwise_idx],
                                     x_v=cell_masks_aligned_padded_vertical_flattened_per_section[section_to_analyze][source_sectionwise_idx],
                                     x_d=cell_masks_aligned_padded_diagonal_flattened_per_section[section_to_analyze][source_sectionwise_idx])
    
    for i_sectionwise_idx, vec, jac in zip(neighbors[source_sectionwise_idx], neighbor_vectors[source_sectionwise_idx], jacs):
        length = np.sqrt(np.sum(vec**2))
        direction = np.arctan2(vec[1], vec[0])
        orientation_diff = np.abs(cell_orientations_per_section[section_to_analyze][source_sectionwise_idx] - cell_orientations_per_section[section_to_analyze][i_sectionwise_idx])
        size_diff = np.abs(cell_sizes_per_section[section_to_analyze][source_sectionwise_idx] - cell_sizes_per_section[section_to_analyze][i_sectionwise_idx])
    
        g.add_edge(source_sectionwise_idx, i_sectionwise_idx, weight=1, length=length, direction=direction, 
                   orientation_diff=orientation_diff,
                  size_diff=size_diff,
                  jaccard=jac)

In [84]:
for u, v in g.edges_iter():
#     print u, v
    print g.get_edge_data(u, v)

{'size_diff': 1440, 'weight': 1, 'jaccard': 0.070909090909090908, 'length': 98.194300882521532, 'orientation_diff': 1.335130090045004, 'direction': -2.6648013321959305}
{'size_diff': 1236, 'weight': 1, 'jaccard': 0.19039301310043669, 'direction': 2.1935084455187952, 'length': 82.903515504941367, 'orientation_diff': 0.42952213848519938}
{'size_diff': 1495, 'weight': 1, 'jaccard': 0.02564102564102564, 'length': 65.466222646420576, 'orientation_diff': 0.40253956848198136, 'direction': 0.20159967035626031}
{'size_diff': 1479, 'weight': 1, 'jaccard': 0.043956043956043959, 'length': 70.391968010306385, 'orientation_diff': 0.98750784129011404, 'direction': 2.099650644919933}
{'size_diff': 1332, 'weight': 1, 'jaccard': 0.15476190476190477, 'direction': 1.1917501035731137, 'length': 64.551790780908789, 'orientation_diff': 0.40511585593568411}
{'size_diff': 1509, 'weight': 1, 'jaccard': 0.01282051282051282, 'length': 62.849956082710783, 'orientation_diff': 0.053291053092801466, 'direction': 1.20

In [88]:
large_cell_subgraph = g.subgraph(large_cell_indices_inside)

In [92]:
large_cell_subgraph.number_of_edges()

499

In [90]:
for u, v in large_cell_subgraph.edges_iter():
#     print u, v
    print large_cell_subgraph.get_edge_data(u, v)

{'size_diff': 601, 'weight': 1, 'jaccard': 0.341324200913242, 'direction': 0.40625863617100244, 'length': 95.878407359319112, 'orientation_diff': 0.077299467625365659}
{'size_diff': 977, 'weight': 1, 'jaccard': 0.27132486388384752, 'direction': 0.25761495730220757, 'length': 71.594809484772057, 'orientation_diff': 0.46836272350922914}
{'size_diff': 917, 'weight': 1, 'jaccard': 0.23565323565323565, 'direction': 1.0803426394587143, 'length': 60.228218217263432, 'orientation_diff': 0.79353553581117908}
{'size_diff': 706, 'weight': 1, 'jaccard': 0.31118143459915609, 'direction': 1.2668972641829805, 'length': 67.297388983594388, 'orientation_diff': 0.065468085316862767}
{'size_diff': 138, 'weight': 1, 'jaccard': 0.45510835913312692, 'direction': 0.13793397971939311, 'length': 73.472863044492939, 'orientation_diff': 0.51315771954686595}
{'size_diff': 147, 'weight': 1, 'jaccard': 0.50986842105263153, 'direction': 0.62725051235177987, 'length': 66.625729135793179, 'orientation_diff': 0.3141149

In [96]:
cell_sizes_per_section[section_to_analyze]

[54,
 53,
 143,
 36,
 136,
 67,
 229,
 50,
 216,
 57,
 80,
 150,
 93,
 245,
 230,
 59,
 229,
 69,
 97,
 69,
 62,
 66,
 421,
 166,
 213,
 73,
 66,
 78,
 177,
 439,
 83,
 110,
 465,
 224,
 157,
 79,
 439,
 187,
 201,
 231,
 245,
 43,
 157,
 216,
 33,
 146,
 48,
 62,
 206,
 150,
 193,
 19,
 560,
 237,
 245,
 443,
 13,
 52,
 82,
 17,
 509,
 500,
 118,
 48,
 72,
 125,
 408,
 643,
 280,
 313,
 359,
 115,
 55,
 62,
 71,
 85,
 181,
 398,
 105,
 54,
 57,
 71,
 81,
 94,
 28,
 272,
 528,
 48,
 309,
 156,
 59,
 71,
 150,
 52,
 172,
 86,
 154,
 888,
 67,
 291,
 254,
 360,
 488,
 63,
 175,
 183,
 490,
 108,
 208,
 26,
 69,
 23,
 406,
 44,
 32,
 211,
 77,
 192,
 431,
 21,
 65,
 89,
 203,
 367,
 322,
 41,
 78,
 57,
 176,
 124,
 60,
 186,
 716,
 72,
 149,
 231,
 42,
 181,
 130,
 60,
 65,
 242,
 68,
 159,
 346,
 677,
 64,
 77,
 170,
 225,
 315,
 76,
 229,
 75,
 439,
 188,
 395,
 451,
 169,
 238,
 118,
 151,
 57,
 69,
 68,
 33,
 365,
 393,
 64,
 594,
 28,
 37,
 84,
 145,
 80,
 130,
 60,
 65,
 135,
 65,
 

In [85]:
# composite_bin_indices = [r * n_angular + a for r, a in zip(radial_indices, angular_indices)]

# n_composite_bins = n_radial * n_angular

# histogram = np.bincount(composite_bin_indices, minlength=n_composite_bins)
# histogram = histogram/float(histogram.sum())

# plt.bar(range(n_composite_bins), histogram);
# plt.ylabel('density');
# plt.xlabel('bins');
# plt.show();