In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import time
import cv2

from joblib import Parallel, delayed

sys.path.append(os.path.join(os.environ['REPO_DIR'], 'utilities'))
from utilities2015 import *
from data_manager import *
from metadata import *

import matplotlib.pyplot as plt
%matplotlib inline

from skimage.transform import rotate
from multiprocess import Pool

import pandas

from cell_utilities import *

Setting environment for Gordon


No vtk


In [2]:
stack = 'MD589'

In [3]:
DETECTED_CELLS_DIR = '/home/yuncong/csd395/CSHL_cells_v2/detected_cells/'
PROCESSED_CELLS_DIR = '/home/yuncong/csd395/CSHL_cells_v2/processed_cells/'

In [4]:
def load_cell_list_data(what):
    if what == 'address':
        fn = 'cell_addresses.bp'
    elif what == 'mask_aligned_padded':
        fn = 'cells_aligned_padded.bp'
    elif what == 'mask_aligned_padded_horizontal':
        fn = 'cells_h_aligned_padded.bp'
    elif what == 'mask_aligned_padded_vertical':
        fn = 'cells_v_aligned_padded.bp'
    elif what == 'mask_aligned_padded_diagonal':
        fn = 'cells_d_aligned_padded.bp'
    elif what == 'largest30p_indices':
        fn = 'largest30p_indices.bp'
    elif what == 'embedding':
        fn = 'embeddings_normalized_largest30p_nystromSample100.bp'
        
    return bp.unpack_ndarray_file(os.path.join(PROCESSED_CELLS_DIR, fn))

In [5]:
def load_cell_data(what, stack, sections):
    
    sections_to_filenames = metadata_cache['sections_to_filenames'][stack]
    
    if what == 'orientation':
        fn_template = '%(fn)s_blobOrientations.bp'
    elif what == 'major':
        fn_template = '%(fn)s_blobMajorAxisLen.bp'
    elif what == 'minor':
        fn_template = '%(fn)s_blobMinorAxisLen.bp'
    elif what == 'mask_center':
        fn_template = '%(fn)s_blobMaskCenters.bp'
    elif what == 'mask':
        fn_template = '%(fn)s_blobMasks.hdf'
    elif what == 'centroid':
        fn_template = '%(fn)s_blobCentroids.bp'
    elif what == 'contours':
        fn_template = '%(fn)s_blobContours.hdf'
    elif what == 'image':
        fn_template = '%(fn)s_image.jpg'
    else:
        raise Exception('Not recognized.')

    data = {}
    for sec in sections:
        fn = sections_to_filenames[sec]
        fp = os.path.join(DETECTED_CELLS_DIR, stack, '%(fn)s', fn_template) % {'fn': fn}
        if fp.endswith('bp'):
            data[sec] = bp.unpack_ndarray_file(fp)
        elif fp.endswith('jpg'):
            data[sec] = imread(fp)
        elif fp.endswith('hdf'):
            data[sec] = pandas.read_hdf(fp)
        else:
            raise Exception('Not recognized.')
            
    return data

In [6]:
cell_orientations_per_section = load_cell_data('orientation', stack=stack, sections=[192,242])
cell_centroids_per_section = load_cell_data('centroid', stack=stack, sections=[192,242])

In [7]:
cell_numbers_per_section = {sec: x.shape[0] for sec, x in cell_centroids_per_section.iteritems()}

In [8]:
cell_addresses = load_cell_list_data('address')

In [9]:
cell_masks_aligned_padded = load_cell_list_data('mask_aligned_padded')

In [10]:
cell_masks_aligned_padded_horizontal = load_cell_list_data('mask_aligned_padded_horizontal')
cell_masks_aligned_padded_vertical = load_cell_list_data('mask_aligned_padded_vertical')
cell_masks_aligned_padded_diagonal = load_cell_list_data('mask_aligned_padded_diagonal')

In [10]:
# cell_masks_per_section = load_cell_data('mask', stack, [192, 242])
# cell_sizes_per_section = {sec: [np.count_nonzero(msk) for msk in masks] for sec, masks in cell_masks_per_section.iteritems()}

In [11]:
def group_according_to_address(x, addresses):
    
    x_per_section = defaultdict(dict)

    for (sec, secwise_idx), xi in zip(addresses, x):
        x_per_section[sec][secwise_idx] = xi

    x_per_section.default_factory = None
    
    return x_per_section

cell_masks_aligned_padded_per_section = group_according_to_address(cell_masks_aligned_padded, cell_addresses)
cell_masks_aligned_padded_horizontal_per_section = group_according_to_address(cell_masks_aligned_padded_horizontal, cell_addresses)
cell_masks_aligned_padded_vertical_per_section = group_according_to_address(cell_masks_aligned_padded_vertical, cell_addresses)
cell_masks_aligned_padded_diagonal_per_section = group_according_to_address(cell_masks_aligned_padded_diagonal, cell_addresses)

def flatten_mask_dict(cell_masks_aligned_padded_per_section):
    cell_masks_aligned_padded_flattened_per_section = {sec: {secwise_idx: mask.flatten() 
                                                         for secwise_idx, mask in masks.iteritems()}
                                                   for sec, masks in cell_masks_aligned_padded_per_section.iteritems()}
    return cell_masks_aligned_padded_flattened_per_section

cell_masks_aligned_padded_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_per_section)
cell_masks_aligned_padded_horizontal_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_horizontal_per_section)
cell_masks_aligned_padded_vertical_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_vertical_per_section)
cell_masks_aligned_padded_diagonal_flattened_per_section = flatten_mask_dict(cell_masks_aligned_padded_diagonal_per_section)

In [24]:
cell_sizes_per_section = {sec: {idx: np.count_nonzero(msk) for idx, msk in masks.iteritems()} for sec, masks in cell_masks_aligned_padded_flattened_per_section.iteritems()}

In [26]:
# Free up memory

del cell_masks_aligned_padded_per_section, cell_masks_aligned_padded_horizontal_per_section, \
cell_masks_aligned_padded_vertical_per_section, cell_masks_aligned_padded_diagonal_per_section

NameError: name 'cell_masks_aligned_padded_per_section' is not defined

In [14]:
del cell_masks_aligned_padded, cell_masks_aligned_padded_horizontal, \
cell_masks_aligned_padded_vertical, cell_masks_aligned_padded_diagonal

In [27]:
large_cell_indices = load_cell_list_data('largest30p_indices')

In [28]:
cell_types_per_section = {sec: np.zeros((cell_numbers_per_section[sec],), dtype=np.uint8) for sec in [192, 242]}

for i in large_cell_indices:
    sec, idx = cell_addresses[i]
    cell_types_per_section[sec][idx] = 1

In [29]:
large_cell_indices_per_section = {sec: np.where(types)[0] for sec, types in cell_types_per_section.iteritems()}

In [30]:
large_cell_embedding = load_cell_list_data('embedding')

In [31]:
from scipy.spatial.kdtree import KDTree
from scipy.spatial.distance import cdist, pdist

In [None]:
# Load annotation contours

downsample_factor = 1

anchor_filename = metadata_cache['anchor_fn'][stack]
sections_to_filenames = metadata_cache['sections_to_filenames'][stack]
filenames_to_sections = {f: s for s, f in sections_to_filenames.iteritems()
                        if f not in ['Placeholder', 'Nonexisting', 'Rescan']}

# Load transforms, defined on thumbnails
import cPickle as pickle
Ts = pickle.load(open(thumbnail_data_dir + '/%(stack)s/%(stack)s_elastix_output/%(stack)s_transformsTo_anchor.pkl' % dict(stack=stack), 'r'))

Ts_inv_downsampled = {}
for fn, T0 in Ts.iteritems():
    T = T0.copy()
    T[:2, 2] = T[:2, 2] * 32 / downsample_factor
    Tinv = np.linalg.inv(T)
    Ts_inv_downsampled[fn] = Tinv

# Load bounds
crop_xmin, crop_xmax, crop_ymin, crop_ymax = metadata_cache['cropbox'][stack]
print 'crop:', crop_xmin, crop_xmax, crop_ymin, crop_ymax

# tb_vol_xmin, tb_vol_xmax, tb_vol_ymin, tb_vol_ymax, tb_vol_zmin, tb_vol_zmax = \
# np.loadtxt(os.path.join(VOLUME_ROOTDIR, stack, '%(stack)s_down%(downsample)dVolume_bbox.txt') % \
#            dict(stack=stack, downsample=32), dtype=np.int)
# print 'tb_vol:', tb_vol_xmin, tb_vol_xmax, tb_vol_ymin, tb_vol_ymax, tb_vol_zmin, tb_vol_zmax

In [82]:
section_to_analyze = 192

In [83]:
contour_df, _ = DataManager.load_annotation_v3(stack=stack, annotation_rootdir=annotation_midbrainIncluded_v2_rootdir)
contours = {cnt['name']: cnt['vertices'] for cnt_id, cnt in contour_df[contour_df['section'] == section_to_analyze].iterrows()}

'No object named structures in the file'


Annotation has no structures.


In [84]:
# Find each LARGE cell's neighbors

t = time.time()

tree = KDTree(cell_centroids_per_section[section_to_analyze])

pool = Pool(12)
neighbors = pool.map(lambda i: list(set(tree.query_ball_point(cell_centroids_per_section[section_to_analyze][i], r=100)) - {i}),
                     large_cell_indices_per_section[section_to_analyze])
pool.terminate()
pool.join()

sys.stderr.write('Neighbor search: %.2f seconds\n' % (time.time()-t)) # 10 seconds

neighbors = dict(zip(large_cell_indices_per_section[section_to_analyze], neighbors))

# Compute neighbot vectors

neighbor_vectors = {i: cell_centroids_per_section[section_to_analyze][i] - cell_centroids_per_section[section_to_analyze][nns] 
                    for i, nns in neighbors.iteritems()}


# Binning each cell's neighbors

# radial_bins = np.logspace(0, 2, 10, base=10)
radial_bins = np.linspace(0, 100, 10)
n_radial = len(radial_bins)

angular_bins = np.linspace(-np.pi, np.pi, 8)
n_angular = len(angular_bins)


radial_indices_all = {}
angular_indices_all = {}
for i in neighbor_vectors.iterkeys():
    radial_indices, angular_indices = allocate_radial_angular_bins(neighbor_vectors[i], 
                                                                   cell_orientations_per_section[section_to_analyze][i],
                                                        angular_bins=angular_bins, radial_bins=radial_bins)
    radial_indices_all[i] = radial_indices
    angular_indices_all[i] = angular_indices
    
# Examples
print neighbors[16]
print neighbor_vectors[16]
print radial_indices_all[16]
print angular_indices_all[16]

Neighbor search: 14.52 seconds


[1, 321, 201, 10, 365, 334, 176, 110, 297, 185, 217, 93, 394]
[[-28.59108511   4.049518  ]
 [-48.95959019 -66.04874034]
 [-29.96215429 -40.71882581]
 [ 74.32117904   2.04645197]
 [ 20.99089735 -76.70865367]
 [ 65.65841308 -68.31338846]
 [ 25.22117904 -36.4077147 ]
 [ 94.35668629 -23.33959876]
 [ -2.03791187 -61.25922985]
 [-78.00024953 -36.82676232]
 [  4.37117904 -44.95643265]
 [ 35.37117904 -19.24874034]
 [  1.12117904 -80.04104803]]
[3 8 5 7 8 9 4 9 6 8 5 4 8]
[7 1 1 4 2 3 3 4 2 1 2 3 2]


In [85]:
image = load_cell_data('image', stack=stack, sections=[section_to_analyze])[section_to_analyze]

In [87]:
contours.keys()

['7n', 'RMC', '7N', 'SC', 'IC', 'Pn']

In [88]:
# Identify large and small cells inside each structure.

large_cell_indices_this_section = large_cell_indices_per_section[section_to_analyze]
large_cell_centroids_this_section = cell_centroids_per_section[section_to_analyze][large_cell_indices_this_section]

# for name_u, cnt in contours.iteritems():
name_u = 'Pn'
cnt = contours[name_u]

n = len(cnt)
fn = sections_to_filenames[section_to_analyze]
vertices_on_aligned = np.dot(Ts_inv_downsampled[fn], np.c_[cnt/downsample_factor, np.ones((n,))].T).T[:, :2]

xs = vertices_on_aligned[:,0] - crop_xmin * 32 / downsample_factor
ys = vertices_on_aligned[:,1] - crop_ymin * 32 / downsample_factor

cnt_cropped = np.c_[xs, ys].astype(np.int)

# Get large cells

# from shapely.geometry import Polygon
from matplotlib.path import Path

large_cell_is_inside = Path(cnt_cropped.astype(np.int)).contains_points(large_cell_centroids_this_section)
large_cell_indices_inside = large_cell_indices_this_section[large_cell_is_inside]

print '%d large cells are identified in %s.' % (len(large_cell_indices_inside), name_u)

# Small cells

small_cell_indices_this_section = np.array(list(set(range(cell_numbers_per_section[section_to_analyze])) - set(large_cell_indices_this_section.tolist())))
small_cell_centroids_this_section = cell_centroids_per_section[section_to_analyze][small_cell_indices_this_section]
small_cell_is_inside = Path(cnt_cropped.astype(np.int)).contains_points(small_cell_centroids_this_section)
small_cell_indices_inside = small_cell_indices_this_section[small_cell_is_inside]

print '%d small cells are identified in %s.' % (len(small_cell_indices_inside), name_u)

447 large cells are identified in Pn.
464 small cells are identified in Pn.


In [89]:
# Visualize cell contours

cell_contours = load_cell_data('contours', stack=stack, sections=[section_to_analyze])[section_to_analyze]
cell_mask_centers = load_cell_data('mask_center', stack=stack, sections=[section_to_analyze])[section_to_analyze]
cell_contours_global = {i: cell_centroids_per_section[section_to_analyze][i].astype(np.int) - cell_mask_centers[i] + cell_contours[i]
                        for i in range(cell_numbers_per_section[section_to_analyze])}

In [125]:
# Compute cell size distribution

size_bins = np.r_[np.linspace(0, 3000, 10), np.inf]

large_cell_size_inside_histogram, _ = np.histogram([cell_sizes_per_section[section_to_analyze][i] for i in large_cell_indices_inside], bins=size_bins)
print large_cell_size_inside_histogram

all_cell_size_inside_histogram, _ = np.histogram([cell_sizes_per_section[section_to_analyze][i] for i in np.r_[large_cell_indices_inside, small_cell_indices_inside]], 
                                       bins=size_bins)
print all_cell_size_inside_histogram

In [90]:
viz = image.copy()

for i in large_cell_indices_inside:
    cv2.polylines(viz, [cell_contours_global[i].astype(np.int)], isClosed=True, color=(255,0,0), thickness=1)
    
for i in small_cell_indices_inside:
    cv2.polylines(viz, [cell_contours_global[i].astype(np.int)], isClosed=True, color=(0,255,0), thickness=1)
        
cv2.polylines(viz, [cnt_cropped.astype(np.int)], isClosed=True, color=(0,0,255), thickness=1)

# Visualize radial-angular histogram of selected cells

# for source in large_cell_indices_inside:
for source in large_cell_indices_inside[50:51]:
    for i in neighbors[source]:
        
        if i in large_cell_indices_inside:
            cv2.circle(viz, tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 3, color=(255,0,0), thickness=-1)
            
            cv2.line(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                 tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 
                 color=(255,0,0), thickness=1)


        if i in small_cell_indices_inside:
            cv2.circle(viz, tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 3, color=(0,255,0), thickness=-1)            
            
            cv2.line(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                     tuple(cell_centroids_per_section[section_to_analyze][i].astype(np.int)), 
                     color=(0,255,0), thickness=1)

        
    # draw radial bins
    for rb in radial_bins:
        cv2.circle(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                  int(rb), color=(0,0,255), thickness=1)

    # draw angular bins
    for ab in angular_bins:
        cv2.line(viz, tuple(cell_centroids_per_section[section_to_analyze][source].astype(np.int)), 
                 tuple((cell_centroids_per_section[section_to_analyze][source] + (np.cos(ab)*100, np.sin(ab)*100)).astype(np.int)),
         color=(0,0,255), thickness=1)

# Find crop box

margin = 50
roi_xmin, roi_ymin = cnt_cropped.astype(np.int).min(axis=0) - margin
roi_xmax, roi_ymax = cnt_cropped.astype(np.int).max(axis=0) + margin
print roi_xmin, roi_xmax, roi_ymin, roi_ymax

display_image(viz[roi_ymin:roi_ymax+1, roi_xmin:roi_xmax+1])

3062 4618 8845 10529


In [91]:
# Construct graph

import networkx as nx
g = nx.Graph()

In [92]:
for source_sectionwise_idx in large_cell_indices_inside:
    
    neighbor_masks = np.array([cell_masks_aligned_padded_flattened_per_section[section_to_analyze][i_sectionwise_idx]
                      for i_sectionwise_idx in neighbors[source_sectionwise_idx]])
    
    jacs, _ = compute_jaccard_x_vs_list(cell_masks_aligned_padded_flattened_per_section[section_to_analyze][source_sectionwise_idx], 
                                    neighbor_masks,
                                    x_h=cell_masks_aligned_padded_horizontal_flattened_per_section[section_to_analyze][source_sectionwise_idx],
                                     x_v=cell_masks_aligned_padded_vertical_flattened_per_section[section_to_analyze][source_sectionwise_idx],
                                     x_d=cell_masks_aligned_padded_diagonal_flattened_per_section[section_to_analyze][source_sectionwise_idx])
    
    for i_sectionwise_idx, vec, jac in zip(neighbors[source_sectionwise_idx], neighbor_vectors[source_sectionwise_idx], jacs):
        length = np.sqrt(np.sum(vec**2))
        direction = np.arctan2(vec[1], vec[0])
        orientation_diff = np.abs(cell_orientations_per_section[section_to_analyze][source_sectionwise_idx] - cell_orientations_per_section[section_to_analyze][i_sectionwise_idx])
        size_diff = np.abs(cell_sizes_per_section[section_to_analyze][source_sectionwise_idx] - cell_sizes_per_section[section_to_analyze][i_sectionwise_idx])
    
        g.add_edge(source_sectionwise_idx, i_sectionwise_idx, weight=1, length=length, direction=direction, 
                   orientation_diff=orientation_diff,
                  size_diff=size_diff,
                  jaccard=jac)

In [93]:
for u, v in g.edges_iter():
#     print u, v
    print g.get_edge_data(u, v)

{'size_diff': 490, 'weight': 1, 'jaccard': 0.016064257028112448, 'length': 36.677575949443224, 'orientation_diff': 0.77086476165036188, 'direction': -3.0431266121155862}
{'size_diff': 162, 'weight': 1, 'jaccard': 0.047058823529411764, 'length': 66.489865738972782, 'orientation_diff': 0.038697472506822761, 'direction': -1.6928514676872368}
{'size_diff': 1001, 'weight': 1, 'jaccard': 0.0079286422200198214, 'length': 51.435610163751136, 'orientation_diff': 1.2583464903269936, 'direction': 0.72424803080374123}
{'size_diff': 382, 'weight': 1, 'jaccard': 0.020512820512820513, 'length': 78.611154518967226, 'orientation_diff': 0.11522882547165914, 'direction': 1.8058026206881632}
{'size_diff': 415, 'weight': 1, 'jaccard': 0.018912529550827423, 'length': 54.020711652374594, 'orientation_diff': 2.6865802463581989, 'direction': 2.3689797009463303}
{'size_diff': 358, 'weight': 1, 'jaccard': 0.02185792349726776, 'length': 31.737883650887763, 'orientation_diff': 2.0129551684023288, 'direction': 1.61

In [94]:
large_cell_subgraph = g.subgraph(large_cell_indices_inside)

In [95]:
large_cell_subgraph.number_of_edges()

2356

In [96]:
for u, v in large_cell_subgraph.edges_iter():
#     print u, v
    print large_cell_subgraph.get_edge_data(u, v)

{'size_diff': 813, 'weight': 1, 'jaccard': 0.21846435100548445, 'direction': 2.7905584222375657, 'length': 62.082119576240615, 'orientation_diff': 0.45425454466155579}
{'size_diff': 790, 'weight': 1, 'jaccard': 0.23835616438356164, 'direction': 2.2245119767918378, 'length': 52.073561012494643, 'orientation_diff': 0.69835530971765858}
{'size_diff': 903, 'weight': 1, 'jaccard': 0.15843429636533085, 'direction': 1.2724349815903933, 'length': 95.296011252867473, 'orientation_diff': 0.59144113725577807}
{'size_diff': 689, 'weight': 1, 'jaccard': 0.31616982836495033, 'direction': 0.65475089060048908, 'length': 37.659432101747228, 'orientation_diff': 0.20897071365174397}
{'size_diff': 834, 'weight': 1, 'jaccard': 0.20256645279560037, 'direction': 0.91981019350490034, 'length': 97.992391295718562, 'orientation_diff': 0.65166949307253608}
{'size_diff': 806, 'weight': 1, 'jaccard': 0.23844731977818853, 'direction': 1.694634543499304, 'length': 84.5295067893028, 'orientation_diff': 0.187409046295

In [85]:
# composite_bin_indices = [r * n_angular + a for r, a in zip(radial_indices, angular_indices)]

# n_composite_bins = n_radial * n_angular

# histogram = np.bincount(composite_bin_indices, minlength=n_composite_bins)
# histogram = histogram/float(histogram.sum())

# plt.bar(range(n_composite_bins), histogram);
# plt.ylabel('density');
# plt.xlabel('bins');
# plt.show();