In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np

import sys
import os

sys.path.append(os.environ['REPO_DIR'] + '/utilities')
from utilities2015 import *
from metadata import *
from data_manager import *

import matplotlib.pyplot as plt
%matplotlib inline

from joblib import Parallel, delayed
import time

Setting environment for Gordon


In [3]:
from scipy.interpolate import RectBivariateSpline
from skimage.transform import resize

In [2]:
paired_structures = ['5N', '6N', '7N', '7n', 'Amb', 'LC', 'LRt', 'Pn', 'Tz', 'VLL', 'RMC', 'SNC', 'SNR', '3N', '4N',
                    'Sp5I', 'Sp5O', 'Sp5C', 'PBG', '10N', 'VCA', 'VCP', 'DC']
singular_structures = ['AP', '12N', 'RtTg', 'SC', 'IC']

structures = paired_structures + singular_structures

In [4]:
# input
predictions_dir = '/home/yuncong/csd395/CSHL_patch_Sat16ClassFinetuned_v2_predictions'

# output
# scoremaps_rootdir = '/oasis/projects/nsf/csd395/yuncong/CSHL_lossless_scoremaps_Sat16ClassFinetuned_v2/'
create_if_not_exists(scoremaps_rootdir)

'/oasis/projects/nsf/csd395/yuncong/CSHL_lossless_scoremaps_Sat16ClassFinetuned_v2/'

In [5]:
for stack in ['MD590']:
        
    filenames_to_sections, sections_to_filenames = DataManager.load_sorted_filenames(stack)
    first_sec, last_sec = DataManager.load_cropbox(stack)[4:]
    anchor_fn = DataManager.load_anchor_filename(stack)

    for sec in range(first_sec, last_sec+1):

        fn = sections_to_filenames[sec]
        if fn in ['Nonexisting', 'Rescan', 'Placeholder']:
            continue

        # output
        scoremaps_dir = os.path.join(scoremaps_rootdir, stack, 
                                     '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped' % \
                                     dict(stack=stack, fn=fn, anchor_fn=anchor_fn))
        create_if_not_exists(scoremaps_dir)

        ## define grid, generate patches

        t = time.time()

        locations_fn = patch_features_rootdir + '/%(stack)s/%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped/%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_patch_locations.txt' % dict(stack=stack, fn=fn, anchor_fn=anchor_fn)

        with open(locations_fn, 'r') as f:
            sample_locations_roi = np.array([map(int, line.split()[1:]) for line in f.readlines()])
        
        ## interpolate
        
        interpolation_xmin, interpolation_ymin = sample_locations_roi.min(axis=0)
        interpolation_xmax, interpolation_ymax = sample_locations_roi.max(axis=0)
        interpolation_w = interpolation_xmax - interpolation_xmin + 1
        interpolation_h = interpolation_ymax - interpolation_ymin + 1

        ##### sample_locations_roi + scores to dense_score_map #####

        shrink_factor = 4 # do interpolation on a smaller grid, then resize to original dimension

        sample_locations_unique_xs = np.unique(sample_locations_roi[:,0])
        sample_locations_unique_ys = np.unique(sample_locations_roi[:,1])

        n_sample_x = sample_locations_unique_xs.size
        n_sample_y = sample_locations_unique_ys.size

        index_x = dict([(j,i) for i,j in enumerate(sample_locations_unique_xs)])
        index_y = dict([(j,i) for i,j in enumerate(sample_locations_unique_ys)])
        sample_location_indices = np.asarray([(index_x[x], index_y[y]) for x, y in sample_locations_roi])

        sample_locations_interpolatedArea_ys_matrix, \
        sample_locations_interpolatedArea_xs_matrix = np.meshgrid(range(interpolation_ymin/shrink_factor, 
                                                                        interpolation_ymax/shrink_factor), 
                                                                  range(interpolation_xmin/shrink_factor, 
                                                                        interpolation_xmax/shrink_factor), 
                                                                  indexing='ij')

        sparse_score_dir = create_if_not_exists(os.path.join(predictions_dir, stack, '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped' % \
                                      {'fn': fn, 'anchor_fn': anchor_fn}))

        probs_allClasses = {label: bp.unpack_ndarray_file(sparse_score_dir + '/%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_%(label)s_sparseScores.hdf' % \
                    {'fn': fn, 'anchor_fn': anchor_fn, 'label':label})
                            for label in structures}

        sys.stderr.write('preprocess: %.2f seconds\n' % (time.time() - t))
        
        def generate_score_map(label):

            if label == 'BackG':
                return None
            
            score_matrix = np.zeros((n_sample_x, n_sample_y))
            score_matrix[sample_location_indices[:,0], sample_location_indices[:,1]] = probs_allClasses[label]

            spline = RectBivariateSpline(sample_locations_unique_xs/shrink_factor, 
                                         sample_locations_unique_ys/shrink_factor, 
                                         score_matrix, 
                                         bbox=[interpolation_xmin/shrink_factor, 
                                               interpolation_xmax/shrink_factor, 
                                               interpolation_ymin/shrink_factor, 
                                               interpolation_ymax/shrink_factor])

#             t = time.time()
            dense_score_map = spline.ev(sample_locations_interpolatedArea_xs_matrix, 
                                        sample_locations_interpolatedArea_ys_matrix)
#             sys.stderr.write('evaluate spline: %.2f seconds\n' % (time.time() - t))
            
            t1 = time.time()
            dense_score_map = resize(dense_score_map, (interpolation_h, interpolation_w)) # similar speed as rescale
#             dense_score_map = rescale(dense_score_map, shrink_factor)
            sys.stderr.write('scale up: %.2f seconds\n' % (time.time() - t1))

#             t = time.time()
            dense_score_map[dense_score_map < 1e-1] = 0
#             sys.stderr.write('threshold: %.2f seconds\n' % (time.time() - t))

            if np.count_nonzero(dense_score_map) < 1e5:
                sys.stderr.write('No %s is detected on section %d\n' % (label, sec))
                return None
            
#             t = time.time()
#             bp.pack_ndarray_file(dense_score_map.astype(np.float32), 
#                                    os.path.join(scoremaps_dir, '%(dataset)s_denseScoreMapLossless_%(label)s.bp' % \
#                                                 {'dataset': dataset, 'label': label}))

            scoremap_bp_filepath = os.path.join(scoremaps_dir, '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_%(label)s_denseScoreMap.hdf' % \
                                  {'fn': fn, 'anchor_fn': anchor_fn, 'label':label})

            save_hdf(dense_score_map.astype(np.float16), scoremap_bp_filepath, complevel=5)
#             sys.stderr.write('save: %.2f seconds\n' % (time.time() - t))
    
            scoremap_interpBox_filepath = os.path.join(scoremaps_dir, '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_%(label)s_denseScoreMap_interpBox.txt' % \
                                    {'fn': fn, 'anchor_fn': anchor_fn, 'label':label})
    
            np.savetxt(scoremap_interpBox_filepath,
                       np.array((interpolation_xmin, interpolation_xmax, interpolation_ymin, interpolation_ymax))[None], 
                       fmt='%d')
        
        t = time.time()
        
        # if too many disk saves are simultaneous, they will be conflicting, so split into two sessions
#         for i in range(0, len(structures), 8):
#             _ = Parallel(n_jobs=16)(delayed(generate_score_map)(l) for l in structures[i:i+15])

        _ = Parallel(n_jobs=15)(delayed(generate_score_map)(l) for l in structures)
 
        sys.stderr.write('interpolate: %.2f seconds\n' % (time.time() - t)) # ~ 30 seconds / section on one node

preprocess: 1.75 seconds
scale up: 7.81 seconds
scale up: 7.90 seconds
scale up: 8.11 seconds
scale up: 8.16 seconds
scale up: 8.20 seconds
scale up: 8.25 seconds
scale up: 8.26 seconds
scale up: 8.25 seconds
scale up: 8.27 seconds
scale up: 8.27 seconds
scale up: 8.28 seconds
scale up: 8.30 seconds
scale up: 8.30 seconds
scale up: 8.37 seconds
scale up: 8.37 seconds
scale up: 7.46 seconds
scale up: 7.46 seconds
scale up: 7.47 seconds
scale up: 7.59 seconds
scale up: 7.61 seconds
scale up: 7.70 seconds
scale up: 7.63 seconds
scale up: 7.69 seconds
scale up: 7.72 seconds
scale up: 7.77 seconds
scale up: 7.68 seconds
scale up: 7.66 seconds
scale up: 7.69 seconds
interpolate: 29.82 seconds
preprocess: 1.58 seconds
scale up: 7.83 seconds
scale up: 7.91 seconds
scale up: 7.94 seconds
scale up: 8.00 seconds
scale up: 8.01 seconds
scale up: 8.05 seconds
scale up: 8.04 seconds
scale up: 8.05 seconds
scale up: 8.08 seconds
scale up: 8.07 seconds
scale up: 8.08 seconds
scale up: 8.11 seconds
sca

KeyboardInterrupt: 