In [1]:
import os
import errno
import argparse

import numpy as np
import skimage.external.tifffile as tiff
from tensorflow.python.keras.optimizers import SGD
from tensorflow.python.keras import backend as K

from deepcell import get_image_sizes
from deepcell import make_training_data
from deepcell import bn_feature_net_31x31
from deepcell import dilated_bn_feature_net_31x31
from deepcell import train_model_watershed
from deepcell import train_model_watershed_sample
from deepcell import bn_dense_feature_net
from deepcell import rate_scheduler
from deepcell import train_model_disc, train_model_conv, train_model_sample
from deepcell import run_models_on_directory
from deepcell import export_model
from deepcell import get_data

# data options
#DATA_OUTPUT_MODE = 'conv'
DATA_OUTPUT_MODE = 'sample'
BORDER_MODE = 'valid' if DATA_OUTPUT_MODE == 'sample' else 'same'
RESIZE = True                                                              #was True
RESHAPE_SIZE = 512
WINDOW_SIZE = (15,15)
N_EPOCHS = 40
BINS = 4
MAX_TRAIN = 1e7
CHANNEL_NAMES = ['dsDNA', 'Ca', 'H3K27me3', 'H3K9ac', 'Ta', 'P.','edge_pred', 'interior_pred']

# Check for channels_first or channels_last
IS_CHANNELS_FIRST = K.image_data_format() == 'channels_first'
ROW_AXIS = 2 if IS_CHANNELS_FIRST else 1
COL_AXIS = 3 if IS_CHANNELS_FIRST else 2
CHANNEL_AXIS = 1 if IS_CHANNELS_FIRST else -1


# filepath constants
DATA_DIR = '/data/data'
MODEL_DIR = '/data/models'
NPZ_DIR = '/data/npz_data'
RESULTS_DIR = '/data/results'
EXPORT_DIR = '/data/exports'
PREFIX = 'tissues/mibi/samir'
FG_BG_DATA_FILE = 'mibi_watershedFB_{}_{}'.format(K.image_data_format(), DATA_OUTPUT_MODE)
WATERSHED_DATA_FILE = 'mibi_watershed_{}_{}'.format(K.image_data_format(), DATA_OUTPUT_MODE)
CONV_DATA_FILE = 'mibi_watershedconv_{}_{}'.format(K.image_data_format(), 'conv')
RUN_DIR = 'set1'


for d in (NPZ_DIR, MODEL_DIR, RESULTS_DIR):
    try:
        os.makedirs(os.path.join(d, PREFIX))
    except OSError as exc: # Guard against race condition
        if exc.errno != errno.EEXIST:
            raise


Using TensorFlow backend.


In [2]:
def generate_training_data():
#    file_name_save = os.path.join(NPZ_DIR, PREFIX, DATA_FILE)
    num_of_features = 1 # Specify the number of feature masks that are present
    training_direcs = ['set1', 'set2']
    channel_names = CHANNEL_NAMES
    raw_image_direc = 'raw'
    annotation_direc = 'annotated'
    
    # Create training data for watershed energy transform
    make_training_data(
        direc_name=os.path.join(DATA_DIR, PREFIX),
        dimensionality=2,
        max_training_examples=MAX_TRAIN, # Define maximum number of training examples
        window_size_x=WINDOW_SIZE[0],
        window_size_y=WINDOW_SIZE[1],
        border_mode=BORDER_MODE,
        file_name_save=os.path.join(NPZ_DIR, PREFIX, WATERSHED_DATA_FILE),
        training_direcs=training_direcs,
        distance_transform=True,
        distance_bins=BINS,
        channel_names=channel_names,
        num_of_features=BINS,
        raw_image_direc=raw_image_direc,
        annotation_direc=annotation_direc,
        reshape_size=RESHAPE_SIZE if RESIZE else None,
        edge_feature=[1, 0, 0], # Specify which feature is the edge feature,
        dilation_radius=1,
        output_mode=DATA_OUTPUT_MODE,
        display=False,
        verbose=True)

In [3]:
def train_model_on_training_data():
    direc_save = os.path.join(MODEL_DIR, PREFIX)
    direc_data = os.path.join(NPZ_DIR, PREFIX)
    training_data = np.load(os.path.join(direc_data,FG_BG_DATA_FILE + '.npz'))

    #class_weights = training_data['class_weights']
    X, y = training_data['X'], training_data['y']
    print('X.shape: {}\ny.shape: {}'.format(X.shape, y.shape))

    batch_size = 32 if DATA_OUTPUT_MODE == 'sample' else 1
    optimizer = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    lr_sched = rate_scheduler(lr=0.01, decay=0.99)
    n_epoch=N_EPOCHS

    distance_bins = 4

    model_args = {
        'norm_method': 'max',
        'reg': 1e-5,
        'n_features': distance_bins,
        'n_channels' : len(CHANNEL_NAMES)
    }

    data_format = K.image_data_format()
    row_axis = 2 if data_format == 'channels_first' else 1
    col_axis = 3 if data_format == 'channels_first' else 2
    channel_axis = 1 if data_format == 'channels_first' else 3

    size = (RESHAPE_SIZE, RESHAPE_SIZE) if RESIZE else X.shape[row_axis:col_axis + 1]  #added

    if data_format == 'channels_first':
        model_args['input_shape'] = (X.shape[channel_axis], size[0], size[1])
    else:
        model_args['input_shape'] = (size[0], size[1], X.shape[channel_axis])

    #instantiate and train foreground/background separation model
    fgbg_model = bn_feature_net_31x31(n_features=3, n_channels=len(CHANNEL_NAMES))
    
    # instantiate and train watershed model
    watershed_model = bn_feature_net_31x31(n_features=BINS, n_channels=len(CHANNEL_NAMES))

    train_model_watershed_sample(
        model=watershed_model,
        dataset=WATERSHED_DATA_FILE,
        optimizer=optimizer,
        batch_size=batch_size,
        n_epoch=n_epoch,
        distance_bins=BINS,
        direc_save=os.path.join(MODEL_DIR, PREFIX),
        direc_data=os.path.join(NPZ_DIR, PREFIX),
        expt='watershed',
        lr_sched=lr_sched,
        class_weight=training_data['class_weights'],
        rotation_range=180,
        flip=True,
        shear=False)

In [4]:
generate_training_data()

Reshaped feature data from (2, 2048, 2048, 3) to (32, 512, 512, 3)
Reshaped training data from (2, 2048, 2048, 8) to (32, 512, 512, 8)
max_training_examples is: 10000000.0
[0, 0, 0, 1]
class list for set  1 is:  [     0. 188387.  29842.  14095.]
class list for set  2 is:  [     0. 195474.  25078.  11772.]
class list for set  3 is:  [     0. 199468.  22420.  10436.]
class list for set  4 is:  [     0. 184873.  32360.  15091.]
class list for set  5 is:  [     0. 191862.  27707.  12755.]
class list for set  6 is:  [     0. 194965.  25663.  11696.]
class list for set  7 is:  [     0. 197336.  24014.  10974.]
class list for set  8 is:  [     0. 195436.  25346.  11542.]
class list for set  9 is:  [     0. 194404.  25878.  12042.]
class list for set  10 is:  [     0. 189583.  29305.  13436.]
class list for set  11 is:  [     0. 193532.  26897.  11895.]
class list for set  12 is:  [     0. 200977.  21453.   9894.]
class list for set  13 is:  [     0. 200920.  21557.   9847.]
class list for set

In [5]:
train_model_on_training_data()

X.shape: (32, 512, 512, 5)
y.shape: (2583728,)
Using feature net 31x31 with batch normalization
Using feature net 31x31 with batch normalization
batch in get_data is:  [ 5 23 23 ...  1 28  1]
get_data batch_train is:  [20  8  9 ...  7  5 19]
get_data batch_test is:  [28 20 23 ... 27 27 25]
X_train shape: (32, 512, 512, 8)
y_train shape: (1504881,)
X_test shape: (32, 512, 512, 8)
y_test shape: (167209,)
Output Shape: (None, 4)
Number of Classes: 4
Using real-time data augmentation.


ValueError: Training batches and labels should have the samelength. Found X.shape: (32, 512, 512, 8) y.shape: (1504881, 4)