In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys 
import os

import numpy as np
import tensorflow as tf

sys.path.append(os.environ['REPO_DIR'] + '/utilities')
from utilities2015 import *
from metadata import *
from data_manager import *
from learning_utilities import *
from distributed_utilities import *

In [None]:
stack = 'MD585'

In [None]:
annotation_grid_indices_fp = os.path.join(ANNOTATION_ROOTDIR, stack, stack + '_annotation_grid_indices.h5')
download_from_s3(annotation_grid_indices_fp)
grid_indices_per_label = read_hdf(annotation_grid_indices_fp, 'grid_indices')

In [None]:
classifier_id = 30
classifier_properties = classifier_settings.loc[classifier_id]

margin = classifier_properties['margin']
model = classifier_properties['model']
sample_weighting = classifier_properties['sample_weighting']
neg_composition = classifier_properties['neg_composition']

In [None]:
# Determine which labels to load.

structures_to_sample = all_known_structures
negative_labels_to_sample = [s + '_negative' for s in structures_to_sample]

margins_to_sample = [margin] # (200: 100 um, 500: 250 um)
surround_positive_labels_to_sample = [convert_to_surround_name(s, margin=m, suffix=surr_l) 
                             for m in margins_to_sample
                             for s in structures_to_sample 
                             for surr_l in structures_to_sample
                             if surr_l != s]
surround_noclass_labels_to_sample = [convert_to_surround_name(s, margin=m, suffix='noclass') 
                             for m in margins_to_sample
                             for s in structures_to_sample]

if neg_composition == 'neg_has_everything_else':
    labels_to_sample = structures_to_sample + negative_labels_to_sample
elif neg_composition == 'neg_has_only_surround_noclass':
    labels_to_sample = structures_to_sample + surround_noclass_labels_to_sample + ['noclass']
elif neg_composition == 'neg_has_all_surround':
    labels_to_sample = structures_to_sample + surround_positive_labels_to_sample + surround_noclass_labels_to_sample + ['noclass']

In [None]:
# Load training dataset.

training_set_ids = map(int, str(classifier_properties['train_set_id']).split('/'))
training_addresses = load_dataset_addresses(training_set_ids, labels_to_sample=labels_to_sample)

In [None]:
labels_found = training_addresses.keys()
structures_found = set([convert_to_original_name(l) for l in labels_found 
                        if convert_to_original_name(l) in labels_found]) - {'noclass'}

In [None]:
training_patches_pos = bp.unpack_ndarray_file('/tmp/patch_dataset_20/7N.bp')
training_patches_neg = bp.unpack_ndarray_file('/tmp/patch_dataset_20/7N_surround_500_noclass.bp')

In [None]:
# training_patches_pos = extract_patches_given_locations_multiple_sections(training_addresses['7N'], 
#                                                                          location_or_grid_index='grid_index',
#                                                                         version='cropped_gray')

In [None]:
# training_patches_neg = extract_patches_given_locations_multiple_sections(training_addresses['7N_surround_500_noclass'][:10], 
#                                                                          location_or_grid_index='grid_index',
#                                                                         version='cropped_gray')

In [None]:
training_patches = np.concatenate([training_patches_pos, training_patches_neg])

In [None]:
# for structure in structures_found:

#     print structure
    
#     #############################
#     ## Define Positive Samples ##
#     #############################
    
#     features_pos = training_features[structure]
#     n_pos = len(features_pos)
    
#     #############################
#     ## Define Negative Samples ##
#     #############################
    
# #     if setting in [1, 3, 4, 5, 6, 7]:
#     if neg_composition == 'neg_has_only_surround_noclass':
#         neg_classes = [convert_to_surround_name(structure, margin=margin, suffix='noclass')]
# #     elif setting in [2, 10, 23, 24, 25, 26]:
#     elif neg_composition == 'neg_has_all_surround':
#         neg_classes = [convert_to_surround_name(structure, margin=margin, suffix='noclass')]
#         for surr_s in structures_found:
#             c = convert_to_surround_name(structure, margin=margin, suffix=surr_s)
#             if c in labels_found:
#                 neg_classes.append(c)
# #     elif setting in [8,9,11]:
#     elif neg_composition == 'neg_has_everything_else':
#         neg_classes = [structure + '_negative']
#     else:
#         raise Exception('neg_composition %s is not recognized.' % neg_composition)

#     features_neg = np.concatenate([training_features[neg_class] for neg_class in neg_classes])
        
#     n_neg = len(features_neg)
        
#     ###########################################################################################

#     train_data = np.r_[features_pos, features_neg]
#     train_labels = np.r_[np.ones((features_pos.shape[0], )), 
#                          -np.ones((features_neg.shape[0], ))]

In [None]:
patch_size = 224
half_size = patch_size/2
stride = 56

In [None]:
section_to_filename = metadata_cache['sections_to_filenames'][stack]

image_width, image_height = metadata_cache['image_shape'][stack]
grid_spec = (patch_size, stride, image_width, image_height)

sample_locations = grid_parameters_to_sample_locations(grid_spec=grid_spec)

sec = 180

# Use grids only on mask.
t = time.time()
mask_tb = DataManager.load_thumbnail_mask_v2(stack=stack, section=sec)
indices_roi = locate_patches_v2(grid_spec=grid_spec, mask_tb=mask_tb)
sys.stderr.write('locate patches: %.2f seconds\n' % (time.time() - t))       

#         Use grids on the entire frame.
#         indices_roi = range(len(sample_locations))

n = len(indices_roi)
sys.stderr.write('%d samples.\n' % n)

sample_locations_roi = sample_locations[indices_roi]

t = time.time()
img_fp = DataManager.get_image_filepath(stack=stack, section=sec, version='cropped_gray', resol='lossless')
download_from_s3(img_fp)
im = img_as_ubyte(imread(img_fp))
sys.stderr.write('load image: %.2f seconds\n' % (time.time() - t)) # ~ 35s

#         t = time.time()
#         sat = convert_to_saturation(im)
#         del im
#         sys.stderr.write('Convert to saturation: %.2f seconds\n' % (time.time() - t)) # ~ 35s
#         sat = imread(DataManager.get_image_filepath(stack=stack, section=sec, version='saturation'))


t = time.time()

patches = np.array([im[y-half_size:y+half_size, x-half_size:x+half_size]
                    for x, y in sample_locations_roi]) # n x 224 x 224

In [None]:
training_patches = np.array([im[y-half_size:y+half_size, x-half_size:x+half_size]
                    for x, y in sample_locations_roi[:300]]) # n x 224 x 224

In [None]:
training_labels = np.zeros((training_patches.shape[0], 2))
training_labels[:training_patches_pos.shape[0], 0] = 1
training_labels[training_patches_pos.shape[0]:, 1] = 1

In [None]:
# https://github.com/tensorflow/tensorflow/blob/a5d8217c4ed90041bea2616c14a8ddcf11ec8c03/tensorflow/examples/tutorials/mnist/input_data.py#L92
class DataSet(object):
    
    def __init__(self, images, labels):
        """Construct a DataSet. one_hot arg is used only if fake_data is true."""


        assert images.shape[0] == labels.shape[0], (
          'images.shape: %s labels.shape: %s' % (images.shape,
                                                 labels.shape))
        self._num_examples = images.shape[0]

        # Convert shape from [num examples, rows, columns, depth]
        # to [num examples, rows*columns] (assuming depth == 1)
        #assert images.shape[3] == 1
        images = images.reshape(images.shape[0],
                              images.shape[1] * images.shape[2])
        # Convert from [0, 255] -> [0.0, 1.0].
        images = images.astype(np.float32)
        images = images / 255.
        self._images = images
        self._labels = labels
        self._epochs_completed = 0
        self._index_in_epoch = 0

    @property
    def images(self):
        return self._images

    @property
    def labels(self):
        return self._labels

    @property
    def num_examples(self):
        return self._num_examples

    @property
    def epochs_completed(self):
        return self._epochs_completed

    def next_batch(self, batch_size, fake_data=False):

        start = self._index_in_epoch
        self._index_in_epoch += batch_size
        if self._index_in_epoch > self._num_examples:
            # Finished epoch
            self._epochs_completed += 1
            # Shuffle the data
            perm = np.arange(self._num_examples)
            np.random.shuffle(perm)
            self._images = self._images[perm]
            self._labels = self._labels[perm]
            # Start next epoch
            start = 0
            self._index_in_epoch = batch_size
            assert batch_size <= self._num_examples
        end = self._index_in_epoch
        return self._images[start:end], self._labels[start:end]

In [None]:
data_set = DataSet(training_patches[:128], training_labels[:128])

In [None]:
n_classes = 2
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

learning_rate = 0.1
training_iters = 10000000
batch_size = 128
display_step = 10
num_epochs = 100

# # Network Parameters
# n_input = 224*224 # MNIST data input (img shape: 28*28)
dropout = 0.75 # Dropout, probability to keep units

In [None]:
# https://github.com/tensorflow/tensorflow/blob/r1.1/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py#L45

In [None]:
with tf.name_scope('input'):
# Input data
    images_initializer = tf.placeholder(dtype=data_set.images.dtype,
                                        shape=data_set.images.shape)
#                                         shape=[None, 784])
    labels_initializer = tf.placeholder(dtype=data_set.labels.dtype,
                                        shape=data_set.labels.shape)
#                                          shape=[None, 2])
    input_images = tf.Variable(images_initializer, trainable=False, collections=[])
    input_labels = tf.Variable(labels_initializer, trainable=False, collections=[])

    image, label = tf.train.slice_input_producer(
      [input_images, input_labels], num_epochs=num_epochs)
    label = tf.cast(label, tf.int32)
    images, labels = tf.train.batch(
      [image, label], batch_size=batch_size)

In [None]:
# Network Parameters
n_hidden_1 = 64 # 1st layer num features
n_hidden_2 = 16 # 2nd layer num features
n_input = 224 * 224 # MNIST data input (img shape: 28*28)

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
}
biases = {
    'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'decoder_b2': tf.Variable(tf.random_normal([n_input])),
}

# Building the encoder
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
                                   biases['encoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['encoder_h2']),
                                   biases['encoder_b2']))
    return layer_2


# Building the decoder
def decoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
                                   biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['decoder_h2']),
                                   biases['decoder_b2']))
    return layer_2

# Construct model
encoder_op = encoder(images)
decoder_op = decoder(encoder_op)

# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = images

# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    sess.run(input_images.initializer,
             feed_dict={images_initializer: data_set.images})
    sess.run(input_labels.initializer,
             feed_dict={labels_initializer: data_set.labels})
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
#         batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x, batch_y = data_set.next_batch(batch_size)
        # Run optimization op (backprop)
        _, c = sess.run([optimizer, cost], feed_dict={images: batch_x})
#         sess.run(optimizer, feed_dict={images: batch_x, labels: batch_y,
#                                        keep_prob: dropout})
        if step % display_step == 0:
#             loss, acc = sess.run([cost, accuracy], feed_dict={images: batch_x,
#                                                               keep_prob: 1.})
#             print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
#                   "{:.6f}".format(loss) + ", Training Accuracy= " + \
#                   "{:.5f}".format(acc)

            print "Epoch: %04d, cost=%.9f" % (step + 1, c)
                
            # Applying encode and decode over test set
            encode_decode = sess.run(
                y_pred, feed_dict={images: data_set.images[:128]})

            display_images_in_grids([np.reshape(encode_decode[i], (224, 224)) for i in range(10)], nc=10, 
                                    cmap=plt.cm.gray, vmin=0, vmax=1)
        step += 1
    print "Optimization Finished!"
    
    

In [None]:
display_images_in_grids([np.reshape(encode_decode[i], (224, 224)) for i in range(10)], nc=10, cmap=plt.cm.gray,
                       vmin=0, vmax=1)

In [None]:
display_images_in_grids([np.reshape(data_set.images[i], (224, 224)) for i in range(10)], nc=10, cmap=plt.cm.gray)

In [None]:
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)


def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def simplified_vgg(x, weights, biases, dropout):
    # Reshape input picture
    x = tf.reshape(x, shape=[-1, 224, 224, 1])

    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    conv2 = maxpool2d(conv2, k=4)
    
    # Convolution Layer
    conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
    # Max Pooling (down-sampling)
    conv3 = maxpool2d(conv3, k=4)

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    # Output, class prediction
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [None]:
# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'wc3': tf.Variable(tf.random_normal([5, 5, 64, 64])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bc3': tf.Variable(tf.random_normal([64])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct model
pred = simplified_vgg(images, weights, biases, keep_prob)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=labels))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

In [None]:
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    sess.run(input_images.initializer,
             feed_dict={images_initializer: data_set.images})
    sess.run(input_labels.initializer,
             feed_dict={labels_initializer: data_set.labels})
    step = 1
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
#         batch_x, batch_y = mnist.train.next_batch(batch_size)
        batch_x, batch_y = data_set.next_batch(batch_size)
        # Run optimization op (backprop)
        sess.run(optimizer, feed_dict={images: batch_x, labels: batch_y,
                                       keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={images: batch_x,
                                                              labels: batch_y,
                                                              keep_prob: 1.})
            print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc)
        step += 1
    print "Optimization Finished!"

    # Calculate accuracy for 256 mnist test images
    print "Testing Accuracy:", \
        sess.run(accuracy, feed_dict={images: data_set.images[:128],
                                      labels: data_set.labels[:128],
                                      keep_prob: 1.})