In [7]:
%load_ext autoreload
%autoreload 2

import numpy as np

import sys
import os

sys.path.append(os.environ['REPO_DIR'] + '/utilities')
from utilities2015 import *

import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd

from joblib import Parallel, delayed
import time

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
labels = ['BackG', '5N', '7n', '7N', '12N', 'Pn', 'VLL', 
          '6N', 'Amb', 'R', 'Tz', 'RtTg', 'LRt', 'LC', 'AP', 'sp5']

label_dict = dict([(l,i) for i, l in enumerate(labels)])

In [9]:
from scipy.interpolate import RectBivariateSpline
from skimage.transform import resize

In [10]:
patches_rootdir = '/home/yuncong/CSHL_data_patches'

In [15]:
# input
# predictions_rootdir = '/oasis/projects/nsf/csd395/yuncong/CSHL_patch_predictions_svm_Sat16ClassFinetuned/'
# predictions_rootdir = '/oasis/projects/nsf/csd395/yuncong/CSHL_patch_predictions_svm_Sat16ClassFinetuned_v2/'
predictions_rootdir = '/oasis/projects/nsf/csd395/yuncong/CSHL_patch_predictions_svm_Sat16ClassFinetuned_v3/'
create_if_not_exists(predictions_rootdir)

# output
# scoremaps_rootdir = '/oasis/projects/nsf/csd395/yuncong/CSHL_scoremaps_lossless_svm_Sat16ClassFinetuned/'
# scoremaps_rootdir = '/oasis/projects/nsf/csd395/yuncong/CSHL_scoremaps_lossless_svm_Sat16ClassFinetuned_v2/'
scoremaps_rootdir = '/oasis/projects/nsf/csd395/yuncong/CSHL_scoremaps_lossless_svm_Sat16ClassFinetuned_v3/'
create_if_not_exists(scoremaps_rootdir)

'/oasis/projects/nsf/csd395/yuncong/CSHL_scoremaps_lossless_svm_Sat16ClassFinetuned_v3/'

In [16]:
# for stack in ['MD589']:
for stack in ['MD594']:
# for stack in ['MD585']:
# for stack in ['MD585', 'MD593', 'MD592', 'MD590', 'MD591', 'MD595', 'MD598', 'MD602', 'MD594]:
# for stack in ['MD585', 'MD593', 'MD592', 'MD590']:
    
    first_bs_sec, last_bs_sec = section_range_lookup[stack]

    table_filepath = os.path.join(patches_rootdir, '%(stack)s_indices_allROIs_allSections.h5'%{'stack':stack})
    indices_allROIs_allSections = pd.read_hdf(table_filepath, 'indices_allROIs_allSections')
    grid_parameters = pd.read_hdf(table_filepath, 'grid_parameters')

    patch_size, stride, w, h = grid_parameters.tolist()
    half_size = patch_size/2

    ys, xs = np.meshgrid(np.arange(half_size, h-half_size, stride), np.arange(half_size, w-half_size, stride),
                     indexing='xy')

    sample_locations = np.c_[xs.flat, ys.flat]
    
    first_detect_sec, last_detect_sec = detect_bbox_range_lookup[stack]

    for sec in range(first_detect_sec, last_detect_sec+1, 10):

        if sec not in indices_allROIs_allSections.columns:
            continue

        print sec

        indices_roi = indices_allROIs_allSections[sec]['roi1']
    
        # input
        predictions_dir = os.path.join(predictions_rootdir, stack, '%04d'%sec)
        create_if_not_exists(predictions_dir)

        # output
        scoremaps_dir = os.path.join(scoremaps_rootdir, stack, '%04d'%sec)
        create_if_not_exists(scoremaps_dir)

        ## define grid, generate patches

        t = time.time()

        sample_locations_roi = sample_locations[indices_roi]
        
        ## interpolate
        
        interpolation_xmin, interpolation_ymin = sample_locations_roi.min(axis=0)
        interpolation_xmax, interpolation_ymax = sample_locations_roi.max(axis=0)
        interpolation_w = interpolation_xmax - interpolation_xmin + 1
        interpolation_h = interpolation_ymax - interpolation_ymin + 1

        ##### sample_locations_roi + scores to dense_score_map #####

        shrink_factor = 4 # do interpolation on a smaller grid, then resize to original dimension

        sample_locations_unique_xs = np.unique(sample_locations_roi[:,0])
        sample_locations_unique_ys = np.unique(sample_locations_roi[:,1])

        n_sample_x = sample_locations_unique_xs.size
        n_sample_y = sample_locations_unique_ys.size

        index_x = dict([(j,i) for i,j in enumerate(sample_locations_unique_xs)])
        index_y = dict([(j,i) for i,j in enumerate(sample_locations_unique_ys)])
        sample_location_indices = np.asarray([(index_x[x], index_y[y]) for x, y in sample_locations_roi])

        sample_locations_interpolatedArea_ys_matrix, \
        sample_locations_interpolatedArea_xs_matrix = np.meshgrid(range(interpolation_ymin/shrink_factor, 
                                                                        interpolation_ymax/shrink_factor), 
                                                                  range(interpolation_xmin/shrink_factor, 
                                                                        interpolation_xmax/shrink_factor), 
                                                                  indexing='ij')
        
        dataset = '%(stack)s_%(sec)04d_roi1' % {'stack': stack, 'sec': sec}
    
        probs_allClasses = dict([(label, np.load(predictions_dir + '/%(dataset)s_%(label)s_sparseScores.npy' % \
                                                 {'dataset': dataset, 'label': label}))
                                 for label in labels[1:]])

        sys.stderr.write('preprocess: %.2f seconds\n' % (time.time() - t))
        
        def generate_score_map(label):

            if label == 'BackG':
                return None
            
            score_matrix = np.zeros((n_sample_x, n_sample_y))
            score_matrix[sample_location_indices[:,0], sample_location_indices[:,1]] = probs_allClasses[label]

            spline = RectBivariateSpline(sample_locations_unique_xs/shrink_factor, 
                                         sample_locations_unique_ys/shrink_factor, 
                                         score_matrix, 
                                         bbox=[interpolation_xmin/shrink_factor, 
                                               interpolation_xmax/shrink_factor, 
                                               interpolation_ymin/shrink_factor, 
                                               interpolation_ymax/shrink_factor])

#             t = time.time()
            dense_score_map = spline.ev(sample_locations_interpolatedArea_xs_matrix, 
                                        sample_locations_interpolatedArea_ys_matrix)
#             sys.stderr.write('evaluate spline: %.2f seconds\n' % (time.time() - t))
            
            t1 = time.time()
            dense_score_map = resize(dense_score_map, (interpolation_h, interpolation_w)) # similar speed as rescale
#             dense_score_map = rescale(dense_score_map, shrink_factor)
            sys.stderr.write('scale up: %.2f seconds\n' % (time.time() - t1))

#             t = time.time()
            dense_score_map[dense_score_map < 1e-1] = 0
#             sys.stderr.write('threshold: %.2f seconds\n' % (time.time() - t))

            if np.count_nonzero(dense_score_map) < 1e5:
                sys.stderr.write('No %s is detected on section %d\n' % (label, sec))
                return None
            
#             t = time.time()
#             bp.pack_ndarray_file(dense_score_map.astype(np.float32), 
#                                    os.path.join(scoremaps_dir, '%(dataset)s_denseScoreMapLossless_%(label)s.bp' % \
#                                                 {'dataset': dataset, 'label': label}))
            save_hdf(dense_score_map.astype(np.float16), 
                     os.path.join(scoremaps_dir, '%(dataset)s_denseScoreMapLossless_%(label)s.hdf' % \
                                                {'dataset': dataset, 'label': label}),
                    complevel=5)
#             sys.stderr.write('save: %.2f seconds\n' % (time.time() - t))
    
            np.savetxt(os.path.join(scoremaps_dir, '%(dataset)s_denseScoreMapLossless_%(label)s_interpBox.txt' % \
                                        {'dataset': dataset, 'label': label}),
                   np.array((interpolation_xmin, interpolation_xmax, interpolation_ymin, interpolation_ymax))[None], 
                   fmt='%d')
        
        t = time.time()

        # if too many disk saves are simultaneous, they will be conflicting, so split into two sessions
        _ = Parallel(n_jobs=16)(delayed(generate_score_map)(l) for l in labels[1:len(labels)/2])
        _ = Parallel(n_jobs=16)(delayed(generate_score_map)(l) for l in labels[len(labels)/2:])

        sys.stderr.write('interpolate: %.2f seconds\n' % (time.time() - t)) # ~ 20 seconds / section

143


preprocess: 0.15 seconds
interpolate: 16.40 seconds
preprocess: 0.15 seconds


153


interpolate: 17.75 seconds
preprocess: 0.18 seconds


163


interpolate: 19.84 seconds
preprocess: 0.19 seconds


173


interpolate: 19.64 seconds
preprocess: 0.19 seconds


183


interpolate: 20.58 seconds
preprocess: 0.15 seconds


193


interpolate: 17.36 seconds
preprocess: 0.17 seconds


203


interpolate: 20.48 seconds
preprocess: 0.18 seconds


213


No 7n is detected on section 213
interpolate: 20.60 seconds
preprocess: 0.17 seconds


223


interpolate: 20.51 seconds
preprocess: 0.18 seconds


233


interpolate: 20.48 seconds
preprocess: 0.61 seconds


243


interpolate: 20.50 seconds
preprocess: 0.17 seconds


253


No 6N is detected on section 253
interpolate: 20.59 seconds
preprocess: 0.18 seconds
No Amb is detected on section 253


263


interpolate: 19.69 seconds
preprocess: 0.17 seconds


273


interpolate: 19.10 seconds
preprocess: 0.17 seconds


283


interpolate: 18.87 seconds
preprocess: 0.16 seconds


293


interpolate: 18.31 seconds
preprocess: 0.17 seconds


303


interpolate: 17.99 seconds
