In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np

import sys
import os

sys.path.append(os.environ['REPO_DIR'] + '/utilities')
from utilities2015 import *
from metadata import *
from data_manager import *

import matplotlib.pyplot as plt
%matplotlib inline

import mxnet as mx

from joblib import Parallel, delayed
import time

Setting environment for Precision WorkStation


In [2]:
# patches_rootdir = '/home/yuncong/CSHL_data_patches/'
model_dir = '/home/yuncong/mxnet_models/'

In [3]:
model_name = 'Sat16ClassFinetuned'

In [4]:
# mean_img = mx.nd.load(os.path.join(model_dir, 'mean_224.nd'))['mean_img'].asnumpy()
mean_img = np.load(os.path.join(model_dir, model_name, 'saturation_mean_224.npy'))

In [5]:
model_iteration = 10

model0 = mx.model.FeedForward.load(os.path.join(model_dir, model_name, model_name), model_iteration, ctx=mx.gpu())

flatten_output = model0.symbol.get_internals()['flatten_output']

model = mx.model.FeedForward(ctx=mx.gpu(), symbol=flatten_output, num_epoch=model_iteration,
                            arg_params=model0.arg_params, aux_params=model0.aux_params,
                            allow_extra_params=True)

In [6]:
# model_name = 'Inception'
# model_iteration = 9
# model0 = mx.model.FeedForward.load(os.path.join(model_dir, 'inception-21k', model_name), model_iteration, ctx=mx.gpu())
# flatten_output = model0.symbol.get_internals()['flatten_output']

In [7]:
# a = mx.viz.plot_network(flatten_output, shape={"data":(1, 1, 224, 224)}, node_attrs={"shape":'rect',"fixedsize":'false'})
# a.render(model_name)

In [12]:
from learning_utilities import *

In [13]:
patch_features_rootdir = create_if_not_exists('/media/yuncong/BstemAtlasData/CSHL_patch_features_%(model_name)s_v2' % {'model_name': model_name})

In [14]:
patch_size = 224
half_size = patch_size/2
stride = 56

In [None]:
# for stack in all_stacks:

for stack in ['MD603']:

    print stack

    filename_to_section, section_to_filename = DataManager.load_sorted_filenames(stack)
    anchor_fn = DataManager.load_anchor_filename(stack)

    image_width, image_height = DataManager.get_image_dimension(stack)
    grid_spec = (patch_size, stride, image_width, image_height)

    sample_locations = grid_parameters_to_sample_locations(grid_spec=grid_spec)

    first_detect_sec, last_detect_sec = DataManager.load_cropbox(stack)[4:]

    bar = show_progress_bar(first_detect_sec, last_detect_sec)

#     for sec in range(213, 214):
    for sec in range(first_detect_sec, last_detect_sec+1):
#     for sec in range(first_detect_sec, first_detect_sec+1):

        fn = section_to_filename[sec]
        if fn in ['Placeholder', 'Rescan', 'Nonexisting']:
            continue
            
        print fn
            
        output_dir = create_if_not_exists(os.path.join(patch_features_rootdir, stack, 
                                       '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped' % \
                                                       dict(fn=fn, anchor_fn=anchor_fn)))
        output_indices_fn = os.path.join(output_dir, 
                                         '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_patch_locations.txt' % \
                                         dict(fn=fn, anchor_fn=anchor_fn))
        if os.path.exists(output_indices_fn):
            continue
    
        bar.value = sec

#         mask_tb = DataManager.load_thumbnail_mask(stack=stack, section=sec, cerebellum_removed=True)
        mask_tb = DataManager.load_thumbnail_mask_v2(stack=stack, section=sec)

        t = time.time()
        indices_roi = locate_patches(grid_spec=grid_spec, mask_tb=mask_tb, bbox=(0,0, image_width, image_height))
        sys.stderr.write('locate patches: %.2f seconds\n' % (time.time() - t))

        n = len(indices_roi)
        print n, 'roi samples'

        sample_locations_roi = sample_locations[indices_roi]

        ##################################

        t = time.time()

        sat = imread(DataManager.get_image_filepath(stack=stack, section=sec, version='saturation'))

        sys.stderr.write('load saturation image: %.2f seconds\n' % (time.time() - t)) # ~ 2s


        t = time.time()

#         patches = np.array([sat[y-half_size:y+half_size, x-half_size:x+half_size].copy()
#                             for x, y in sample_locations_roi]) # n x 224 x 224

        patches = np.array([sat[y-half_size:y+half_size, x-half_size:x+half_size]
                            for x, y in sample_locations_roi]) # n x 224 x 224
        
        patches_mean_subtracted = patches - mean_img
        
        patches_mean_subtracted_input = patches_mean_subtracted[:, None, :, :] # n x 1 x 224 x 224
        #         patches = np.rollaxis(patches2, 3, 1)

        sys.stderr.write('extract, reshape, normalize: %.2f seconds\n' % (time.time() - t)) # ~ 4s / 20k patches

        batch_size = 256 # increasing to 500 does not save any time
#         batch_size = 16 # increasing to 500 does not save any time

        data_iter = mx.io.NDArrayIter(
            patches_mean_subtracted_input, 
            np.zeros((n, ), np.int), # labels are not important since it is just feed-forward
            batch_size = batch_size,
            shuffle=False
        )

        t = time.time()
        
        features = model.predict(data_iter)
        
        sys.stderr.write('predict: %.2f seconds\n' % (time.time() - t))
        # The first time CUDA needs to load model, which is very slow ~350s, but later runs are faster ~90s
        
    # Had to modify [model]-symbol.json according to this https://github.com/dmlc/mxnet/issues/2718
    
    # Out of memory after about 20 sections - had to modify storage code according to https://github.com/dmlc/mxnet/issues/3055
    #    - don't think increasing kPoolThreshold to over 4GB will be beneficial, 
    # since the computation time is similar to before limiting the pool, computation is most likely compute-bound.
    # Issue solved by developers.
        
        t = time.time()
        
        output_dir = create_if_not_exists(os.path.join(patch_features_rootdir, stack, 
                                   '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped' % dict(fn=fn, anchor_fn=anchor_fn)))

        output_indices_fn = os.path.join(output_dir, 
                                         '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_patch_locations.txt' % \
                                         dict(fn=fn, anchor_fn=anchor_fn))
        np.savetxt(output_indices_fn, np.c_[indices_roi, sample_locations_roi], fmt='%d %d %d')
        
        output_features_fn = os.path.join(output_dir, '%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_features.hdf' % \
                                         dict(fn=fn, anchor_fn=anchor_fn))
        save_hdf(features.astype(np.float16), output_features_fn) # bloscpack produces files of similar size
        
        sys.stderr.write('save: %.2f seconds\n' % (time.time() - t)) # ~.5s

MD603
MD603-N11-2016.03.02-12.51.47_MD603_1_0031

locate patches: 0.03 seconds
load saturation image: 2.91 seconds
extract, reshape, normalize: 17.10 seconds
predict: 42.14 seconds



23512 patches in ROI
23512 roi samples
MD603-IHC11-2015.12.03-14.10.59_MD603_3_0031

save: 1.72 seconds
locate patches: 0.03 seconds
load saturation image: 2.17 seconds


In [None]:
# Create symbolic links

In [None]:
# patch_features_sorted_rootdir = '/home/yuncong/CSHL_patch_features_Sat16ClassFinetuned_v2_sorted'

In [None]:
# cmd = ('mkdir %(patch_features_sorted_rootdir)s;'
#        'cd %(patch_features_sorted_rootdir)s &&'
#         'rm -rf %(stack)s &&'
#       'mkdir %(stack)s') % \
#         dict(stack=stack, patch_features_sorted_rootdir=patch_features_sorted_rootdir)
# execute_command(cmd)

In [None]:
# for stack in ['MD585', 'MD589', 'MD594']:
    
#     execute_command('cd %(patch_features_sorted_rootdir)s && rm -rf %(stack)s && mkdir %(stack)s;' % \
#                dict(stack=stack, patch_features_sorted_rootdir=patch_features_sorted_rootdir))

#     filename_to_section, section_to_filename = DataManager.load_sorted_filenames(stack)

#     for sec, fn in section_to_filename.iteritems():

#         if fn in ['Placeholder', 'Nonexisting', 'Rescan']:
#             continue
    
#         cmd = ('cd %(patch_features_sorted_rootdir)s/%(stack)s && mkdir %(stack)s_%(sec)04d_lossless_aligned_cropped; '
#     'ln -s %(patch_features_unsorted_rootdir_relpath)s/%(stack)s/%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped/%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_features.hdf '
#     '%(stack)s_%(sec)04d_lossless_aligned_cropped/%(stack)s_%(sec)04d_lossless_aligned_cropped_features.hdf && '
#     'ln -s %(patch_features_unsorted_rootdir_relpath)s/%(stack)s/%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped/%(fn)s_lossless_alignedTo_%(anchor_fn)s_cropped_patch_locations.txt '
#     '%(stack)s_%(sec)04d_lossless_aligned_cropped/%(stack)s_%(sec)04d_lossless_aligned_cropped_patch_locations.txt') % \
#                 dict(stack=stack, sec=sec, anchor_fn=anchor_fn, fn=fn,
#                     patch_features_unsorted_rootdir_relpath='../../../CSHL_patch_features_Sat16ClassFinetuned_v2',
#                     patch_features_sorted_rootdir=patch_features_sorted_rootdir)
#         execute_command(cmd)

In [None]:
# Sync with Gordon

In [None]:
cmd = 'cd /home/yuncong/CSHL_patch_predictions/%(stack)s; rsync -r . yuncong@oasis-dm.sdsc.edu:/home/yuncong/csd395/CSHL_patch_features/%(stack)s' % \
{'stack': stack}

os.system(cmd)

In [None]:
cmd = 'cd /home/yuncong/CSHL_patch_features_Sat16ClassFinetuned/%(stack)s/; \
rsync -r . yuncong@oasis-dm.sdsc.edu:/home/yuncong/csd395/CSHL_patch_features_Sat16ClassFinetuned/%(stack)s' % \
{'stack': stack}

os.system(cmd)

In [None]:
# for stack in ['MD593', 'MD602', 'MD592', 'MD585', 'MD590', 'MD591', 'MD595', 'MD598']:
# # for stack in ['MD589']:
    
#     if stack in ['MD589', 'MD594']:
#         stack_has_annotation = True
#     else:
#         stack_has_annotation = False

#     dm = DataManager(stack=stack, data_dir='/media/yuncong/BstemAtlasData/CSHL_data_processed')

#     table_filepath = os.path.join(patches_rootdir, '%(stack)s_indices_allROIs_allSections.h5'%{'stack':stack})
#     indices_allROIs_allSections = pd.read_hdf(table_filepath, 'indices_allROIs_allSections')
#     grid_parameters = pd.read_hdf(table_filepath, 'grid_parameters')
    
#     patch_size, stride, w, h = grid_parameters.tolist()
#     half_size = patch_size/2
#     ys, xs = np.meshgrid(np.arange(half_size, h-half_size, stride), np.arange(half_size, w-half_size, stride),
#                      indexing='xy')
#     sample_locations = np.c_[xs.flat, ys.flat]
    
#     if stack_has_annotation:
#         table_filepath = os.path.join(patches_rootdir, '%(stack)s_indices_allLandmarks_allSections.h5'%{'stack':stack})
#         indices_allLandmarks_allSections = pd.read_hdf(table_filepath, 'indices_allLandmarks_allSections')

#     first_detect_sec, last_detect_sec = detect_bbox_range_lookup[stack]
    
#     bar = show_progress_bar(first_detect_sec, last_detect_sec)
    
#     for sec in range(first_detect_sec, last_detect_sec+1):
# #     for sec in range(first_detect_sec, first_detect_sec+10):
# #     for sec in range(first_detect_sec, first_detect_sec+1):
        
#         if sec not in indices_allROIs_allSections.columns:
#             continue
            
#         bar.value = sec
                
#         indices_roi = indices_allROIs_allSections[sec]['roi1']
        
#         n = len(indices_roi)
#         print n, 'roi samples'
        
#         ######################
#         t = time.time()
        
#         true_labels = -1 * np.ones((99999,), np.int)
#         if stack_has_annotation:
#             if sec in indices_allLandmarks_allSections:
#                 for l in indices_allLandmarks_allSections[sec].dropna().keys() & labels_surroundIncluded:
#                     true_labels[indices_allLandmarks_allSections[sec][l]] = labels_surroundIncluded_index[l]
#         patch_labels = true_labels[indices_roi]
        
#         create_if_not_exists(test_features_rootdir + '/%(stack)s/%(sec)04d' % {'stack': stack, 'sec': sec})
#         np.save(test_features_rootdir + '/%(stack)s/%(sec)04d/%(stack)s_%(sec)04d_roi1_labels.npy' % \
#                 {'stack': stack, 'sec': sec}, 
#                 patch_labels)
        
#         sys.stderr.write('get true labels: %.2f seconds\n' % (time.time() - t)) # ~ 0s
                
#         ######################
        
#         sample_locations_roi = sample_locations[indices_roi]

#         t = time.time()
        
#         sat = imread(sat_rootdir + '/%(stack)s_saturation/%(stack)s_%(sec)04d_sat.jpg' % {'stack': stack, 'sec': sec})
            
#         sys.stderr.write('load saturation image: %.2f seconds\n' % (time.time() - t)) # ~ 2s
    
#         t = time.time()
    
#         patches = np.array([sat[y-half_size:y+half_size, x-half_size:x+half_size]
#                             for x, y in sample_locations_roi]) # n x 224 x 224
#         patches = patches - mean_img
#         patches = patches[:, None, :, :] # n x 1 x 224 x 224
# #         patches = np.rollaxis(patches2, 3, 1)
    
#         sys.stderr.write('extract, reshape, normalize: %.2f seconds\n' % (time.time() - t)) # ~ 6s
        
#         batch_size = 256 # increasing to 500 does not save any time

#         data_iter = mx.io.NDArrayIter(
#             patches, 
#             np.zeros((n, ), np.int), # labels are not important since it is just feed-forward
#             batch_size = batch_size,
#             shuffle=False
#         )

#         t = time.time()

#         features = model.predict(data_iter)
        
#         sys.stderr.write('predict: %.2f seconds\n' % (time.time() - t)) # ~40s
        
#         t = time.time()
        
#         save_hdf(features, test_features_rootdir + '/%(stack)s/%(sec)04d/%(stack)s_%(sec)04d_roi1_features.hdf' % \
#                  {'stack': stack, 'sec': sec})
        
#         sys.stderr.write('save: %.2f seconds\n' % (time.time() - t)) # ~.5s
        
#         del sat, patches, sample_locations_roi, features
                
#     del sample_locations
