In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys
import time

from utilities2015 import *

from scipy.spatial.distance import cdist
from scipy.cluster.hierarchy import average, fcluster, single, complete

from joblib import Parallel, delayed

from skimage.color import gray2rgb
from skimage.measure import find_contours
from skimage.util import img_as_float

import matplotlib.pyplot as plt
%matplotlib inline

from networkx import from_dict_of_lists, Graph, adjacency_matrix, connected_components
from networkx.algorithms import node_connected_component, dfs_successors, dfs_postorder_nodes

In [2]:
stack_name = 'MD593'
# section_id = 138
# dm = DataManager(stack=stack_name, section=section_id, segm_params_id='gridsize200')
dms = dict([(section_id, DataManager(stack=stack_name, section=section_id, segm_params_id='tSLIC200')) 
            for section_id in [138,139,140]])

In [3]:
def obtain_props_worker(spp):
    return spp.centroid, spp.area, spp.bbox, spp.coords
    # (row, col), a, (min_row, min_col, max_row, max_col),(rows, cols)    

In [4]:
from skimage.segmentation import mark_boundaries, relabel_sequential

grid_size = 200

for si, dm in dms.iteritems():
    
    segmentation = np.zeros((dm.image_height, dm.image_width), np.int16)
    rss, css = np.mgrid[0:dm.image_height:grid_size, 0:dm.image_width:grid_size]
    for gi, (rs, cs) in enumerate(zip(rss.flat, css.flat)):
        segmentation[rs:rs+grid_size, cs:cs+grid_size] = gi
    
    segmentation[~dm.mask] = -1

    # segmentation starts from 0
    masked_segmentation_relabeled, _, _ = relabel_sequential(segmentation + 1)

    # make background label -1
    dm.segmentation = masked_segmentation_relabeled - 1
    
    dm.n_superpixels = dm.segmentation.max() + 1
    
    sp_all_props = regionprops(dm.segmentation + 1, cache=True)
    sp_props = Parallel(n_jobs=16)(delayed(obtain_props_worker)(spp) for spp in sp_all_props)
    dm.sp_centroids, dm.sp_areas, dm.sp_bbox, dm.sp_coords = map(np.asarray, zip(*sp_props))
    
    dm._load_image(versions=['rgb-jpg'])
    segViz = mark_boundaries(dm.image_rgb_jpg[dm.ymin:dm.ymax+1, dm.xmin:dm.xmax+1], 
                             dm.segmentation[dm.ymin:dm.ymax+1, dm.xmin:dm.xmax+1], 
                             color=(1,0,0))
    
    dm.segVizText = img_as_ubyte(segViz)
    
    for s in range(dm.n_superpixels):
        cv2.putText(dm.segVizText, str(s), 
                    tuple(dm.sp_centroids[s][::-1].astype(np.int) - (dm.xmin, dm.ymin) - (10,-10)), 
                    cv2.FONT_HERSHEY_DUPLEX, .5, ((255,0,255)), 1)

  "%s to %s" % (dtypeobj_in, dtypeobj))


In [5]:
display(dms[140].segVizText)

In [6]:
for si, dm in dms.iteritems():
    dm.load_multiple_results(['texMap'])
    
    hs = np.array([np.bincount(dm.textonmap[dm.sp_coords[i][:,0], dm.sp_coords[i][:,1]], 
                                     minlength=dm.n_texton)
                         for i in range(dm.n_superpixels)])
    dm.texton_hists = hs/hs.sum(axis=1)[:,None].astype(np.float)

In [7]:
def compute_cluster_score(self, cluster, seed=None, seed_weight=0, verbose=False, method='rc-mean', thresh=.2):

    cluster_list = list(cluster)
    cluster_avg = self.texton_hists[cluster_list].mean(axis=0)

    surrounds = set([i for i in set.union(*[self.neighbors[c] for c in cluster]) if i not in cluster and i != -1])

    if len(surrounds) == 0: # single sp on background
        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan

    surrounds_list = list(surrounds)

    # if verbose:
    #     print 'min', surrounds_list[ds.argmin()]

    ds = np.atleast_1d(np.squeeze(chi2s([cluster_avg], self.texton_hists[surrounds_list])))

    if method == 'min':
        surround_dist = ds.min()
        if verbose:
            print 'min', surrounds_list[ds.argmin()]
        score = surround_dist

    elif method == 'mean':
        surround_dist = ds.mean()
        score = surround_dist

    elif method == 'percentage':
        surround_dist = np.count_nonzero(ds > thresh) / float(len(ds)) # hard
        score = surround_dist

    elif method == 'percentage-soft':        
        sigma = .01
        surround_dist = np.sum(1./(1+np.exp((thresh - ds)/sigma)))/len(ds); #soft        
        if verbose:
            for t in sorted(zip(surrounds_list, ds), key=itemgetter(1)):
                print t
            plt.hist(ds, bins=np.linspace(0,1,50));
            plt.show();

        score = surround_dist

    elif method == 'rc-min' or method == 'rc-mean':

        sigs_front = []
        if len(cluster) > 1:

            # frontiers = set.union(*[set(self.neighbors[s]) for s in surrounds_list]) & set(cluster_list)
            frontiers = cluster
            for f in frontiers:
                if len(surrounds & set(self.neighbors[f])) > 0:
                    alternative_sps = list((surrounds & set(self.neighbors[f])) - {-1})
                else:
                    q = list(surrounds-{-1})
                    alternative_sps = [q[np.squeeze(cdist([self.sp_centroids[f]], self.sp_centroids[q])).argmin()]]

                # alternative_dist = np.atleast_1d(np.squeeze(chi2s([self.texton_hists[f]], 
                #                                 self.texton_hists[alternative_sps+[f]].mean(axis=0)))).min()
                # alternative_dist = np.min([chi2(self.texton_hists[f], self.texton_hists[[s,f]].mean(axis=0)) for s in alternative_sps])
                alternative_dist = np.mean([chi2(self.texton_hists[f], self.texton_hists[[s,f]].mean(axis=0)) for s in alternative_sps])

                # interior_neighbors = list((set(cluster_list) & set(self.neighbors[f])) - {-1})
                # interior_avg = self.texton_hists[interior_neighbors + [f]].mean(axis=0)
                # curr_dist = .5 * chi2(self.texton_hists[f], interior_avg) + .5 * chi2(self.texton_hists[f], cluster_avg)

                if seed is not None:
                    curr_dist = chi2(self.texton_hists[f], seed_weight*self.texton_hists[seed]+(1.-seed_weight)*self.texton_hists[cluster_list].mean(axis=0))                        
                else:
                    curr_dist = chi2(self.texton_hists[f], self.texton_hists[cluster_list].mean(axis=0))

                sig = alternative_dist - curr_dist
                sigs_front.append(sig)

            if verbose:
                print 'frontiers advantages'
                print zip(list(frontiers), sigs_front)

        sigs_sur = []
        for s in surrounds:
            sur_neighbors = self.neighbors[s] - set(cluster)
            alternative_dist = np.mean([chi2(self.texton_hists[s], self.texton_hists[[s,n]].mean(axis=0)) for n in sur_neighbors])

            if seed is not None:
                curr_dist = chi2(self.texton_hists[s], seed_weight*self.texton_hists[seed]+(1.-seed_weight)*self.texton_hists[cluster_list+[s]].mean(axis=0))
            else:
                curr_dist = chi2(self.texton_hists[s], self.texton_hists[cluster_list+[s]].mean(axis=0))

            sig = curr_dist - alternative_dist
            sigs_sur.append(sig)

        if verbose:
            print 'surround advantages'
            print zip(list(surrounds), sigs_sur)

        # sigs_sur = np.array(sigs_sur)
        # sigs_front = np.array(sigs_front)

        # thresh = .2
        # # sig = int(sig > thresh)
        # sigma = .025
        # sigs = 1./(1+np.exp((thresh - sigs)/sigma)); #soft

        if method == 'rc-min':
            if len(sigs_front) > 0:
                score = min(np.min(sigs_sur), np.min(sigs_front))
                s1_max = np.max(sigs_sur)
                s1_min = np.min(sigs_sur)
                s2_max = np.max(sigs_front)
                s2_min = np.min(sigs_front)
            else:
                score = np.min(sigs_sur)
                s1_max = np.max(sigs_sur)
                s1_min = np.min(sigs_sur)
                s2_max = np.nan
                s2_min = np.nan

            # score = .5*np.min(sigs_sur)+.5*np.min(sigs_front) if len(sigs_front) > 0 else 0                
        elif method == 'rc-mean':
            if len(sigs_front) > 0:
                # print np.mean(sigs_sur), np.mean(sigs_front)
                score = .5*np.mean(sigs_sur)+.5*np.mean(sigs_front)
                # score = max(np.mean(sigs_sur), np.mean(sigs_front))
                s1_max = np.max(sigs_sur)
                s1_min = np.min(sigs_sur)
                s2_max = np.max(sigs_front)
                s2_min = np.min(sigs_front)
            else:
                score = np.mean(sigs_sur)
                s1_max = np.max(sigs_sur)
                s1_min = np.min(sigs_sur)
                s2_max = np.nan
                s2_min = np.nan

    else:
        raise 'unrecognized method'
            # print list(frontiers)[np.argmin(sigs)]


    inter_sp_dists = np.squeeze(pdist(self.texton_hists[list(cluster)], chi2))
    inter_sp_dist = inter_sp_dists.mean()

    if seed is not None:
        seed_dist = chi2(cluster_avg, self.texton_hists[seed])
    else:
        seed_dist = np.nan

    if method == 'rc-min' or method == 'rc-mean':
        return score,  np.mean(sigs_sur),  np.mean(sigs_front), inter_sp_dist, seed_dist, s1_max, s1_min, s2_max, s2_min
    else:
        return score,  np.nan, np.nan, inter_sp_dist, seed_dist, np.nan, np.nan, np.nan, np.nan

In [8]:
def grow_cluster(self, seed, seed_weight=.5,
                verbose=False, all_history=True, 
                 num_sp_percentage_limit=0.05,
                 min_size=1, min_distance=3, thresh=.4,
                 threshold_abs=-0.05, threshold_rel=.4,
                 peakedness_limit=0.001, method='rc-min'):

    from networkx import from_dict_of_lists, Graph, adjacency_matrix, connected_components

    from itertools import chain
    from skimage.feature import peak_local_max
    from scipy.spatial import ConvexHull
    from matplotlib.path import Path

    # self.load_multiple_results(['neighbors', 'texHist', 'segmentation'])

    neighbor_long_graph = from_dict_of_lists(dict(enumerate(self.neighbors)))

    visited = set([])
    curr_cluster = set([])

    candidate_scores = [0]
    candidate_sps = [seed]

    score_tuples = []
    added_sps = []
    n_sps = []

    cluster_list = []
    addorder_list = []

    iter_ind = 0

    hull_begin = False

    nearest_surrounds = []
    toadd_list = []

    while len(candidate_sps) > 0:

        if verbose:
            print '\niter', iter_ind

        best_ind = np.argmax(candidate_scores)

        just_added_score = candidate_scores[best_ind]
        sp = candidate_sps[best_ind]

        del candidate_scores[best_ind]
        del candidate_sps[best_ind]

        if sp in curr_cluster:
            continue

        curr_cluster.add(sp)
        added_sps.append(sp)

        extra_sps = []

        sg = neighbor_long_graph.subgraph(list(set(range(self.n_superpixels)) - curr_cluster))
        for c in connected_components(sg):
            if len(c) < 10: # holes
                extra_sps.append(c)

        extra_sps = list(chain(*extra_sps))
        curr_cluster |= set(extra_sps)
        added_sps += extra_sps

        tt = self.compute_cluster_score(curr_cluster, seed=seed, seed_weight=seed_weight, verbose=verbose, thresh=thresh, method=method)

        # nearest_surround = compute_nearest_surround(curr_cluster, neighbors, texton_hists)
        # nearest_surrounds.append(nearest_surround)

        tot, s1, s2, inter_sp_dist, seed_dist, s1_max, s1_min, s2_max, s2_min = tt

        cluster_avg = self.texton_hists[list(curr_cluster)].mean(axis=0)

        if (len(curr_cluster) > 5 and (seed_dist > .2 or inter_sp_dist > .3)) or (len(curr_cluster) > int(self.n_superpixels * num_sp_percentage_limit)):
            # if verbose:
            print 'terminate', seed_dist, inter_sp_dist
            break

        if np.isnan(tot):
            return [seed], -np.inf
        score_tuples.append(np.r_[just_added_score, tt])

        n_sps.append(len(curr_cluster))

        # just_added_score, curr_total_score, exterior_score, interior_score, compactness_score, surround_pval,
        # interior_pval, size_prior

        if verbose:
            print 'add', sp
            print 'extra', extra_sps
            print 'added_sps', added_sps
            print 'curr_cluster', curr_cluster
            print 'n_sps', n_sps
            print 'tt', tot
            if len(curr_cluster) != len(added_sps):
                print len(curr_cluster), len(added_sps)
                raise

        cluster_list.append(curr_cluster.copy())
        addorder_list.append(added_sps[:])
        candidate_sps = (set(candidate_sps) | \
                         (set.union(*[self.neighbors[i] for i in list(extra_sps)+[sp]]) - {-1})) - curr_cluster
        candidate_sps = list(candidate_sps)

        # for c in candidate_sps:
        #     int_dist = chi2(self.texton_hists[c], self.texton_hists[list(curr_cluster)+[c]].mean(axis=0))
        #     ext_neighbors = self.neighbors[c] - set(curr_cluster)
        #     chi2(self.texton_hists[c], self.texton_hists[s+[c]]) for s in ext_neighbors

        candidate_scores = []
        for c in candidate_sps:
            int_neighbors = list(set(curr_cluster) & self.neighbors[c])
            int_dist = chi2(self.texton_hists[c], self.texton_hists[int_neighbors + [c]].mean(axis=0))
            curr_dist = chi2(self.texton_hists[c], self.texton_hists[list(curr_cluster)+[c]].mean(axis=0))
            seed_dist = chi2(self.texton_hists[c], self.texton_hists[seed])
            sc = .1 * int_dist + .3* curr_dist + .6*seed_dist
            candidate_scores.append(-sc)

        # h_avg = self.texton_hists[list(curr_cluster)].mean(axis=0)
        # candidate_scores = -.5*chi2s([h_avg], self.texton_hists[candidate_sps])-\
        #                 .5*chi2s([self.texton_hists[seed]], self.texton_hists[candidate_sps])

        # candidate_scores = candidate_scores.tolist()

        if verbose:
#                 print 'candidate', candidate_sps
            print 'candidate\n'

            for i,j in sorted(zip(candidate_scores, candidate_sps), reverse=True):
                print i, j
            print 'best', candidate_sps[np.argmax(candidate_scores)]

        toadd_list.append(candidate_sps[np.argmax(candidate_scores)])

        iter_ind += 1

    score_tuples = np.array(score_tuples)

    # peaks_sorted, peakedness_sorted = find_score_peaks(score_tuples[:,1], min_size=min_size, min_distance=min_distance,
    #                                                     threshold_abs=threshold_abs, threshold_rel=threshold_rel, 
    #                                                     peakedness_lim=peakedness_limit,
    #                                                     verbose=verbose)

    peaks_sorted1, peakedness_sorted1 = find_score_peaks(score_tuples[:,2], min_size=min_size, min_distance=min_distance,
                                                        threshold_abs=threshold_abs, threshold_rel=threshold_rel, 
                                                        peakedness_lim=peakedness_limit,
                                                        verbose=verbose)

    peaks_sorted2, peakedness_sorted2 = find_score_peaks(score_tuples[:,3], min_size=min_size, min_distance=min_distance,
                                                        threshold_abs=threshold_abs, threshold_rel=threshold_rel, 
                                                        peakedness_lim=peakedness_limit,
                                                        verbose=verbose)

    peaks_sorted = np.unique(np.r_[peaks_sorted1, peaks_sorted2])
    peakedness_sorted = np.unique(np.r_[peakedness_sorted1, peakedness_sorted2])

    if all_history:
        # return addorder_list, score_tuples, peaks_sorted, peakedness_sorted, nearest_surrounds, toadd_list
        return addorder_list, score_tuples, peaks_sorted, peakedness_sorted, toadd_list, peaks_sorted1, peaks_sorted2
    else:
        return [addorder_list[i] for i in peaks_sorted], score_tuples[peaks_sorted, 1]

In [11]:
from neighbors import *

for si, dm in dms.iteritems():
    dm.neighbors, dm.edge_neighbors, dm.dedge_neighbors = neighbors_info(dm.segmentation, dm.sp_centroids)    

 computing neighbors ... done in 46.3716528416 seconds
compute edge info ...

NameError: global name 'cdist' is not defined

In [9]:
clusters_allhistory, score_tuples, peaks_sorted, _, _, peaks1, peaks2 = grow_cluster(dms[140], 962)

AttributeError: 'DataManager' object has no attribute 'neighbors'

In [None]:
dms[140].plot_scores(peaks1, peaks2, clusters_allhistory, score_tuples, visualize_peaks=True)


KeyboardInterrupt
ERROR:tornado.general:Uncaught exception, closing connection.
Traceback (most recent call last):
  File "/oasis/projects/nsf/csd181/yuncong/virtualenv-1.9.1/yuncongve/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 407, in _run_callback
    callback(*args, **kwargs)
  File "/oasis/projects/nsf/csd181/yuncong/virtualenv-1.9.1/yuncongve/lib/python2.7/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/oasis/projects/nsf/csd181/yuncong/virtualenv-1.9.1/yuncongve/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 252, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/oasis/projects/nsf/csd181/yuncong/virtualenv-1.9.1/yuncongve/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 213, in dispatch_shell
    handler(stream, idents, msg)
  File "/oasis/projects/nsf/csd181/yuncong/virtualenv-1.9.1/yuncongve/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 362, in exec

In [None]:
for si, dm in dms.iteritems():
    dm.load_multiple_results(['texMap'])
    
    hs = np.array([np.bincount(dm.textonmap[dm.ymin+dm.sp_coords2[i][:,0], dm.xmin+dm.sp_coords2[i][:,1]], 
                                     minlength=dm.n_texton)
                         for i in range(dm.n_superpixels2)])
    dm.sp_hists2 = hs/hs.sum(axis=1)[:,None].astype(np.float)

In [None]:
for si, dm in dms.iteritems():
    dm.neighbors2, _, _ = neighbors_info(dm.segmentation2, dm.sp_centroids2)

In [None]:
label_contains = {}
for s in range(dm.n_superpixels):
    label_contains[s] = np.unique(dm.segmentation2[dm.sp_coords[s][:,0], dm.sp_coords[s][:,1]])

In [None]:
sp_label = {}
for s, cs in label_contains.iteritems():
    for c in cs:
        sp_label[c] = s

In [None]:
label_contains[37]

In [None]:
[sp_label[n] for n in dm.neighbors2[135]]

In [None]:
display(dm.segVizText)

In [None]:
display(dm.segVizText2)

In [None]:
label_contains_new = label_contains.copy()

In [None]:
label_contains_new[37]

In [None]:
label_contains_new[38] = set(label_contains_new[38]) | {135,156}
label_contains_new[37] = set(label_contains_new[37]) - {135,156}

In [None]:
d_sur = np.zeros((dm.n_superpixels,))

for l, sps in label_contains.iteritems():
    
    cl = list(sps)
    
    surrounds = set.union(*[dm.neighbors2[i] for i in cl]) - set(cl)
    surrounds_twohop = (set.union(*[dm.neighbors2[i] for i in surrounds]) - set(cl)) - surrounds
    h_cl = dm.sp_hists2[cl].mean(axis=0)
    h_sur = dm.sp_hists2[list(surrounds)].mean(axis=0)
    h_sur2 = dm.sp_hists2[list(surrounds_twohop)].mean(axis=0)
        
    if h_cl[3] < 0.3:
        d_sur[l] = chi2(h_cl, h_sur)

In [None]:
d_sur_new = np.zeros((dm.n_superpixels,))

for l, sps in label_contains_new.iteritems():
    
    cl = list(sps)
    
    surrounds = set.union(*[dm.neighbors2[i] for i in cl]) - set(cl)
    surrounds_twohop = (set.union(*[dm.neighbors2[i] for i in surrounds]) - set(cl)) - surrounds
    h_cl = dm.sp_hists2[cl].mean(axis=0)
    h_sur = dm.sp_hists2[list(surrounds)].mean(axis=0)
    h_sur2 = dm.sp_hists2[list(surrounds_twohop)].mean(axis=0)
        
    if h_cl[3] < 0.3:
        d_sur_new[l] = chi2(h_cl, h_sur)

In [None]:
d_sur_new - d_sur

In [None]:
dm = dms[139]
d_sur = np.zeros((dm.n_superpixels,))
d_sur2 = np.zeros((dm.n_superpixels,))
for s in range(dm.n_superpixels):
    cl = [s]
    surrounds = set.union(*[dm.neighbors[i] for i in cl]) - set(cl)
    surrounds_twohop = (set.union(*[dm.neighbors[i] for i in surrounds]) - set(cl)) - surrounds
    h_cl = dm.sp_hists[cl].mean(axis=0)
    h_sur = dm.sp_hists[list(surrounds)].mean(axis=0)
    h_sur2 = dm.sp_hists[list(surrounds_twohop)].mean(axis=0)
    
    if dm.sp_hists[s,3] < 0.3:
        d_sur[s] = chi2(h_cl, h_sur)
        d_sur2[s] = chi2(h_cl, h_sur2)

In [None]:
d_sur_map = d_sur[dm.segmentation]

In [None]:
d_sur_overlay = plt.cm.jet(d_sur_map)

In [None]:
d_sur_viz = alpha_blending(dm.image_rgb_jpg[dm.ymin:dm.ymax+1, dm.xmin:dm.xmax+1],
                          d_sur_overlay[...,:3], .1, .9)

In [None]:
display(d_sur_viz)

In [None]:
plt.matshow(d_sur_map);
plt.colorbar();

In [None]:
d_sur2_map = d_sur2[dm.segmentation]
plt.matshow(d_sur2_map, cmap=plt.cm.RdBu_r);
plt.colorbar();

In [None]:
display(dms[140].segVizText)

In [None]:
display(dm.segVizText)