In [None]:
import os
import sys

import numpy as np
import pandas as pd
import glob
import scipy.io
import scipy.misc
import matplotlib.pyplot as plt 
from matplotlib.pyplot import imshow
from PIL import Image
from bs4 import BeautifulSoup
from collections import Counter
from shutil import copyfile
import random
import pickle

In [5]:
# Make sample dataset:
def make_sample_unet(data_loc, out_loc, downsample_factor):
    data_classes = glob.glob(data_loc+'/*')
    for data_class_loc in data_classes:
        imgs = glob.glob(data_class_loc+'/*')
        imgs = [loc for loc in imgs if loc.rsplit('.', 1)[-1] in ['tif']]
        data_class = data_class_loc.rsplit('/', 1)[1]
        num = len(imgs)
        sample_num = int(num/downsample_factor)
        samp = np.random.choice(imgs, sample_num)
        print(samp)
        for file in samp:
            name = file.rsplit('/', 1)[1]
            new_loc = os.path.join(out_loc, data_class)
            if not os.path.exists(new_loc):
                os.makedirs(new_loc)
            copyfile(file, os.path.join(new_loc , name))

# data_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train'
# out_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/train'
# make_sample_unet(data_loc, out_loc, 5)

# data_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/valid'
# out_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/valid'
# make_sample_unet(data_loc, out_loc, 5)

[ '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/82_Region_4_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/62_Region_3_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/104_Region_6_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/77_Region_1_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/119_Region_15_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/138_Region_9_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/97_Region_5_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/80_Region_9_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/35_Region_1_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/98_Region_7_crop.tif'
 '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2/train/0/20_Region_67_crop.tif'
 '/home/rbbidar

In [2]:
import os
import numpy as np
import glob
from scipy.ndimage import rotate
from PIL import Image

import keras
from keras.models import Model
from keras import backend as K
from keras.engine.topology import Layer
from keras import metrics
from keras import layers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Reshape, Input, concatenate, Conv2DTranspose
from keras.layers.core import Activation, Dense, Lambda
from keras.constraints import maxnorm
from keras.optimizers import SGD, Adam
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization


############ DATA GENERATORS
def data_gen_aug_combined(file_loc, mask_loc, batch_size, square_rot_p=.3, seed=101):
    # square_rot_p is the prob of using a 90x rotation, otherwise sample from 360. Possibly not useful
    # translate is maximum number of pixels to translate by
    # crops are done 
    square_rot_p = int(square_rot_p)
    np.random.seed(seed)
    all_files=glob.glob(os.path.join(file_loc, '*'))
    all_masks=[]

    all_files = [loc for loc in all_files if loc.rsplit('.', 1)[-1] in ['tif']]

#     for file in all_files:
#         im_name = str(file.rsplit('.', 1)[-2].rsplit('/', 1)[1].rsplit('_', 1)[0].replace(" ", "_"))
#         loc = os.path.join(mask_loc, im_name+'.npy')
#         all_masks.append(loc)
        
    for file in all_files:
        im_name = str(file.rsplit('.', 1)[-2].rsplit('/', 1)[1])
        loc = os.path.join(mask_loc, im_name+'.tif')
        all_masks.append(loc)

    while 1:
        c = list(zip(all_files, all_masks))
        np.random.shuffle(c)
        all_files, all_masks = zip(*c)

        num_batches = int(np.floor(len(all_files)/batch_size))-1

        for batch in range(num_batches):
            x=[]
            y=[]
            batch_files = all_files[batch_size*batch:batch_size*(batch+1)]
            batch_files_mask = all_masks[batch_size*batch:batch_size*(batch+1)]

            for index in range(len(batch_files)):
                image_loc = batch_files[index]
                mask_loc = batch_files_mask[index]

                # load the image
                image = Image.open(image_loc)
                width, height = image.size
                image = np.reshape(np.array(image.getdata()), (height, width, 3))

                #load the mask
                mask = Image.open(mask_loc)
                width, height = mask.size
                mask = np.reshape(np.array(mask.getdata()), (height, width, 4))
                
                # All the randomness:
                height, width = np.shape(image)[0], np.shape(image)[1]
                crop_row = np.random.randint(0, height-320)
                crop_col = np.random.randint(0, width-368)
                flip_vert = np.random.randint(0, 2)
                flip_hor = np.random.randint(0, 2)

                # APPLY AUGMENTATION:
                # flips
                if flip_vert:
                    image = np.flipud(image)
                    mask = np.flipud(mask)

                if flip_hor:
                    image = np.fliplr(image)
                    mask = np.fliplr(mask)

                # rotation
                square_rot =  bool((np.random.uniform(0, 1, 1)<square_rot_p))
                if square_rot:  # maybe this is dumb, but it cant hurt
                    rotations=['0', '90', '180', '270']
                    angle = int(random.choice(rotations))
                    image = rotate(image, angle, reshape=False)
                    mask = rotate(mask, angle, reshape=False)

                else:
                    angle = np.random.uniform(0, 360, 1)
                    image = rotate(image, angle, reshape=False)
                    mask = rotate(mask, angle, reshape=False)
 
                # crop to 320 x 360 so it will fit into network, and for data augmentation
                image = image[crop_row:crop_row+320, crop_col:crop_col+368]
                mask = mask[crop_row:crop_row+320, crop_col:crop_col+368]

                image = image/255.0 # make pixels in [0,1] 
                x.append(image)
                y.append(mask)
            x=np.array(x)
            y=np.array(y)
            yield (x, y)


def data_gen_combined(file_loc, mask_loc, batch_size, seed=101):
    np.random.seed(seed)
    all_files=glob.glob(os.path.join(file_loc, '*'))
    all_files = [loc for loc in all_files if loc.rsplit('.', 1)[-1] in ['tif']]
    all_masks=[]
    for file in all_files:
        im_name = str(file.rsplit('.', 1)[-2].rsplit('/', 1)[1])
        loc = os.path.join(mask_loc, im_name+'.tif')
        all_masks.append(loc)

    all_files = [loc for loc in all_files if loc.rsplit('.', 1)[-1] in ['tif']]

    while 1:
        c = list(zip(all_files, all_masks))
        np.random.shuffle(c)
        all_files, all_masks = zip(*c)
        
        num_batches = int(np.floor(len(all_files)/batch_size))-1
        for batch in range(num_batches):
            x=[]
            y=[]
            batch_files = all_files[batch_size*batch:batch_size*(batch+1)]
            batch_files_mask = all_masks[batch_size*batch:batch_size*(batch+1)]

            for index in range(len(batch_files)):
                image_loc = batch_files[index]
                mask_loc = batch_files_mask[index]

                # load the image
                image = Image.open(image_loc)
                width, height = image.size
                image = np.reshape(np.array(image.getdata()), (height, width, 3))

                #load the mask
                mask = Image.open(mask_loc)
                width, height = mask.size
                mask = np.reshape(np.array(mask.getdata()), (height, width, 4))
                
                ################################ IMPLEMENT::::
                # We will pad the imput to make them all the same size:
                
                # make it the same size as the training examples
                height, width = np.shape(image)[0], np.shape(image)[1]
                crop_row = np.random.randint(0, height-320)
                crop_col = np.random.randint(0, width-368)

                # crop to 320 x 360 so it will fit into network, and for data augmentation
                image = image[crop_row:crop_row+320, crop_col:crop_col+368]
                mask = mask[crop_row:crop_row+320, crop_col:crop_col+368]

                image = image/255.0 # make pixels in [0,1]     
                x.append(image)
                y.append(mask)

            x=np.array(x)
            y=np.array(y)
            yield (x, y)



# Distance loss function
def distance_loss(y_true, y_pred):
    weight = .05 # how mush does the distance matter compared to the cross entropy (fast ai used .001 for 4 more uncertain ones)
    distance_loss = K.binary_crossentropy(y_pred[:, :, :, 0], y_true[:, :, :, 0])    
    cross_entropy = K.categorical_crossentropy(y_true[:, :, :, 1:], y_pred[:, :, :, 1:])    

    return(distance_loss*weight+(1-weight)*cross_entropy)


def unet_mid2(learning_rate=.0001):
    input_shape = (None, None, 3)
    img_input = Input(shape=input_shape)
    conv1 = conv_block(img_input, 32, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    conv1 = conv_block(conv1, 32, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = conv_block(pool1, 64, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    conv2 = conv_block(conv2, 64, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = conv_block(pool2, 128, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    conv3 = conv_block(conv3, 128, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    pool4 = pool3
    conv5 = conv_block(pool4, 256, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    conv5 = conv_block(conv5, 256, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')

    up6 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv5), conv3], axis=3)
    conv6 = conv_block(up6, 128, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    conv6 = conv_block(conv6, 128, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')

    up7 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv6), conv2], axis=3)
    conv7 = conv_block(up7, 64, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    conv7 = conv_block(conv7, 64, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')

    up8 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv7), conv1], axis=3)
    conv8 = conv_block(up8, 32, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')
    conv8 = conv_block(conv8, 32, 3, 3, dropout = .1, padding='same', strides=(1, 1), activation='relu')

    conv9_dist = Conv2D(1, (1, 1), activation='sigmoid')(conv8)
    conv9_cross_entropy = Conv2D(3, (1, 1), activation='softmax')(conv8)
    output = concatenate([conv9_dist, conv9_cross_entropy])

    model = Model(img_input, output)
    model.compile(optimizer=Adam(lr=learning_rate), loss=distance_loss, metrics=[distance_loss])
    return model



def conv_block(x,
              filters,
              num_row,
              num_col,
              dropout, 
              padding='same',
              strides=(1, 1),
              activation='relu'):
    x = Conv2D(filters, (num_row, num_col), strides=strides, padding=padding, activation=activation)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout)(x)

    x = Conv2D(filters, (num_row, num_col), strides=strides, padding=padding, activation=activation)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout)(x)
    return x



In [11]:
import sys
import os
import glob
import random
import numpy as np 
import pandas as pd
import keras
import pickle
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Dropout, Flatten, Reshape, Input
from keras.layers.core import Activation, Dense, Lambda
from keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf

data_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample'
mask_loc = '/home/rbbidart/project/rbbidart/cancer_hist/im_dist_labels'
out_loc = '/home/rbbidart/cancer_hist_out/unet_dist/sample'
epochs = 50
batch_size = 2
model_str = 'unet_mid2'

def distance_loss(y_true, y_pred):
    weight = .05 # how mush does the distance matter compared to the cross entropy (fast ai used .001 for 4 more uncertain ones)
    distance_loss = K.binary_crossentropy(y_pred[:, :, :, 0], y_true[:, :, :, 0])    
    cross_entropy = K.categorical_crossentropy(y_true[:, :, :, 1:], y_pred[:, :, :, 1:]) 
    tf.Print(distance_loss, [tf.shape(distance_loss)], message="distance_loss")
    tf.Print(cross_entropy, [tf.shape(cross_entropy)], message="cross_entropy")
    return(distance_loss*weight+(1-weight)*cross_entropy)


parameters = {
'learning_rate': .0001    
}


epochs=int(epochs)
batch_size=int(batch_size)


# Locations
train_loc = os.path.join(str(data_loc),'train', str(0))
train_mask_loc = os.path.join(str(mask_loc),'train', str(0))
print(train_loc)

valid_loc = os.path.join(str(data_loc),'valid', str(0))
valid_mask_loc = os.path.join(str(mask_loc),'valid', str(0))
print(valid_loc)


num_train = len(glob.glob(os.path.join(train_loc, '*')))
num_valid = len(glob.glob(os.path.join(valid_loc, '*')))
print('num_train', num_train)
print('num_valid', num_valid)

# Params for all models
batch_size=int(batch_size)   # make this divisible by len(x_data)
steps_per_epoch = np.floor(num_train/batch_size) # num of batches from generator at each epoch. (make it full train set)
validation_steps = np.floor(num_valid/batch_size)# size of validation dataset divided by batch size
print('validation_steps', validation_steps)

# need a batch generator to augment the labels same as the train images
valid_generator = data_gen_combined(valid_loc, valid_mask_loc, batch_size, seed=101)
train_generator = data_gen_aug_combined(train_loc, train_mask_loc, batch_size, square_rot_p=.3,  seed=101)

model = unet_mid2(**parameters)
print(model.summary())
name = model_str+'_'+'custom_aug'
out_file=os.path.join(str(out_loc), name)
checkpointer = ModelCheckpoint(filepath=os.path.join(out_loc, name+'_.{epoch:02d}-{distance_loss:.2f}.hdf5'), verbose=1, monitor='distance_loss', save_best_only=True)

callbacks = [EarlyStopping(monitor='distance_loss', patience=15, verbose=0),
    ModelCheckpoint(filepath=os.path.join(out_loc, name + '_.{epoch:02d}-{distance_loss:.2f}.hdf5'), 
        verbose=1, monitor='distance_loss', save_best_only=True)]

hist = model.fit_generator(train_generator,
                                  validation_data=valid_generator,
                                  steps_per_epoch=steps_per_epoch, 
                                  epochs=epochs,
                                  validation_steps=validation_steps,
                                  callbacks=callbacks)
pickle.dump(hist.history, open(out_file, 'wb'))

/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/train/0
/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/valid/0
num_train 15
num_valid 4
validation_steps 2.0
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_5 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_121 (Conv2D)              (None, None, None, 32 896         input_5[0][0]                    
____________________________________________________________________________________________________
batch_normalization_113 (BatchNo (None, None, None, 32 128         conv2d_121[0][0]                 
____________________________________________________________________________________________________
ac

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
import sys
import os
import glob
import random
import numpy as np 
import pandas as pd
import keras
import pickle
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Dropout, Flatten, Reshape, Input
from keras.layers.core import Activation, Dense, Lambda
from keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf

data_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample'
mask_loc = '/home/rbbidart/project/rbbidart/cancer_hist/im_dist_labels'
out_loc = '/home/rbbidart/cancer_hist_out/unet_dist/sample'
epochs = 50
batch_size = 2
model_str = 'unet_mid2'

def distance_loss(y_true, y_pred):
    weight = 1 # how mush does the distance matter compared to the cross entropy (fast ai used .001 for 4 more uncertain ones)
    distance_loss = K.binary_crossentropy(y_pred[:, :, :, 0], y_true[:, :, :, 0])    
    cross_entropy = K.categorical_crossentropy(y_true[:, :, :, 1:], y_pred[:, :, :, 1:]) 
    tf.Print(distance_loss, [tf.shape(distance_loss)], message="distance_loss")
    tf.Print(cross_entropy, [tf.shape(cross_entropy)], message="cross_entropy")
    return(distance_loss*weight+(1-weight)*cross_entropy)


parameters = {
'learning_rate': .0001    
}


epochs=int(epochs)
batch_size=int(batch_size)


# Locations
train_loc = os.path.join(str(data_loc),'train', str(0))
train_mask_loc = os.path.join(str(mask_loc),'train', str(0))
print(train_loc)

valid_loc = os.path.join(str(data_loc),'valid', str(0))
valid_mask_loc = os.path.join(str(mask_loc),'valid', str(0))
print(valid_loc)


num_train = len(glob.glob(os.path.join(train_loc, '*')))
num_valid = len(glob.glob(os.path.join(valid_loc, '*')))
print('num_train', num_train)
print('num_valid', num_valid)

# Params for all models
batch_size=int(batch_size)   # make this divisible by len(x_data)
steps_per_epoch = np.floor(num_train/batch_size) # num of batches from generator at each epoch. (make it full train set)
validation_steps = np.floor(num_valid/batch_size)# size of validation dataset divided by batch size
print('validation_steps', validation_steps)

# need a batch generator to augment the labels same as the train images
valid_generator = data_gen_combined(valid_loc, valid_mask_loc, batch_size, seed=101)
train_generator = data_gen_combined(train_loc, train_mask_loc, batch_size, seed=101)

model = unet_mid2(**parameters)
print(model.summary())
name = model_str+'_'+'custom_aug'
out_file=os.path.join(str(out_loc), name)
checkpointer = ModelCheckpoint(filepath=os.path.join(out_loc, name+'_.{epoch:02d}-{distance_loss:.2f}.hdf5'), verbose=1, monitor='distance_loss', save_best_only=True)

callbacks = [EarlyStopping(monitor='distance_loss', patience=15, verbose=0),
    ModelCheckpoint(filepath=os.path.join(out_loc, name + '_.{epoch:02d}-{distance_loss:.2f}.hdf5'), 
        verbose=1, monitor='distance_loss', save_best_only=True)]

hist = model.fit_generator(train_generator,
                                  validation_data=valid_generator,
                                  steps_per_epoch=steps_per_epoch, 
                                  epochs=epochs,
                                  validation_steps=validation_steps,
                                  callbacks=callbacks)
pickle.dump(hist.history, open(out_file, 'wb'))

/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/train/0
/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/valid/0
num_train 15
num_valid 4
validation_steps 2.0
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_7 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_181 (Conv2D)              (None, None, None, 32 896         input_7[0][0]                    
____________________________________________________________________________________________________
batch_normalization_169 (BatchNo (None, None, None, 32 128         conv2d_181[0][0]                 
____________________________________________________________________________________________________
ac

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50


Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 49/50
Epoch 50/50


In [3]:
import sys
import os
import glob
import random
import numpy as np 
import pandas as pd
import keras
import pickle
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Dropout, Flatten, Reshape, Input
from keras.layers.core import Activation, Dense, Lambda
from keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf

data_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample'
mask_loc = '/home/rbbidart/project/rbbidart/cancer_hist/im_dist_labels'
out_loc = '/home/rbbidart/cancer_hist_out/unet_dist/sample'
epochs = 50
batch_size = 2
model_str = 'unet_mid2'

def distance_loss(y_true, y_pred):
    weight = .1 # how mush does the distance matter compared to the cross entropy (fast ai used .001 for 4 more uncertain ones)
    distance_loss = K.binary_crossentropy(y_pred[:, :, :, 0], y_true[:, :, :, 0])    
    cross_entropy = K.categorical_crossentropy(y_true[:, :, :, 1:], y_pred[:, :, :, 1:]) 
    tf.Print(distance_loss, [tf.shape(distance_loss)], message="distance_loss")
    tf.Print(cross_entropy, [tf.shape(cross_entropy)], message="cross_entropy")
    return(distance_loss*weight+(1-weight)*cross_entropy)


parameters = {
'learning_rate': .0001    
}


epochs=int(epochs)
batch_size=int(batch_size)


# Locations
train_loc = os.path.join(str(data_loc),'train', str(0))
train_mask_loc = os.path.join(str(mask_loc),'train', str(0))
print(train_loc)

valid_loc = os.path.join(str(data_loc),'valid', str(0))
valid_mask_loc = os.path.join(str(mask_loc),'valid', str(0))
print(valid_loc)


num_train = len(glob.glob(os.path.join(train_loc, '*')))
num_valid = len(glob.glob(os.path.join(valid_loc, '*')))
print('num_train', num_train)
print('num_valid', num_valid)

# Params for all models
batch_size=int(batch_size)   # make this divisible by len(x_data)
steps_per_epoch = np.floor(num_train/batch_size) # num of batches from generator at each epoch. (make it full train set)
validation_steps = np.floor(num_valid/batch_size)# size of validation dataset divided by batch size
print('validation_steps', validation_steps)

# need a batch generator to augment the labels same as the train images
valid_generator = data_gen_combined(valid_loc, valid_mask_loc, batch_size, seed=101)
train_generator = data_gen_combined(train_loc, train_mask_loc, batch_size, seed=101)

model = unet_mid2(**parameters)
print(model.summary())
name = model_str+'_'+'custom_aug'
out_file=os.path.join(str(out_loc), name)
checkpointer = ModelCheckpoint(filepath=os.path.join(out_loc, name+'_.{epoch:02d}-{distance_loss:.2f}.hdf5'), verbose=1, monitor='distance_loss', save_best_only=True)

callbacks = [EarlyStopping(monitor='distance_loss', patience=15, verbose=0),
    ModelCheckpoint(filepath=os.path.join(out_loc, name + '_.{epoch:02d}-{distance_loss:.2f}.hdf5'), 
        verbose=1, monitor='distance_loss', save_best_only=True)]

hist = model.fit_generator(train_generator,
                                  validation_data=valid_generator,
                                  steps_per_epoch=steps_per_epoch, 
                                  epochs=epochs,
                                  validation_steps=validation_steps,
                                  callbacks=callbacks)
pickle.dump(hist.history, open(out_file, 'wb'))

/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/train/0
/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/valid/0
num_train 15
num_valid 4
validation_steps 2.0
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, None, None, 32 896         input_1[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, None, None, 32 128         conv2d_1[0][0]                   
____________________________________________________________________________________________________
ac

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50


In [1]:

import sys
import os
import glob
import random
import numpy as np 
import pandas as pd
import keras
import pickle
from keras import backend as K
from keras.engine.topology import Layer
from keras.layers import Dropout, Flatten, Reshape, Input
from keras.layers.core import Activation, Dense, Lambda
from keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf

def unet_standard(learning_rate=.0001):
    input_shape = (None, None, 3)
    img_input = Input(shape=input_shape)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(img_input)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(4, (1, 1), activation='softmax')(conv9)

    model = Model(img_input, conv10)
    model.compile(optimizer=Adam(lr=learning_rate), loss=dice_coef_loss, metrics=[dice_coef])
    return model

data_loc = '/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample'
mask_loc = '/home/rbbidart/project/rbbidart/cancer_hist/im_dist_labels'
out_loc = '/home/rbbidart/cancer_hist_out/unet_dist/sample'
epochs = 50
batch_size = 2
model_str = 'unet_mid2'

def distance_loss(y_true, y_pred):
    weight = .1 # how mush does the distance matter compared to the cross entropy (fast ai used .001 for 4 more uncertain ones)
    distance_loss = K.binary_crossentropy(y_pred[:, :, :, 0], y_true[:, :, :, 0])    
    cross_entropy = K.categorical_crossentropy(y_true[:, :, :, 1:], y_pred[:, :, :, 1:]) 
    distance_loss_print = tf.Print(distance_loss, [tf.shape(distance_loss)], message="distance_loss")
    cross_entropy_print = tf.Print(cross_entropy, [tf.shape(cross_entropy)], message="cross_entropy")
    return(distance_loss*weight+(1-weight)*cross_entropy)


parameters = {
'learning_rate': .0001    
}


epochs=int(epochs)
batch_size=int(batch_size)


# Locations
train_loc = os.path.join(str(data_loc),'train', str(0))
train_mask_loc = os.path.join(str(mask_loc),'train', str(0))
print(train_loc)

valid_loc = os.path.join(str(data_loc),'valid', str(0))
valid_mask_loc = os.path.join(str(mask_loc),'valid', str(0))
print(valid_loc)


num_train = len(glob.glob(os.path.join(train_loc, '*')))
num_valid = len(glob.glob(os.path.join(valid_loc, '*')))
print('num_train', num_train)
print('num_valid', num_valid)

# Params for all models
batch_size=int(batch_size)   # make this divisible by len(x_data)
steps_per_epoch = np.floor(num_train/batch_size) # num of batches from generator at each epoch. (make it full train set)
validation_steps = np.floor(num_valid/batch_size)# size of validation dataset divided by batch size
print('validation_steps', validation_steps)

# need a batch generator to augment the labels same as the train images
valid_generator = data_gen_combined(valid_loc, valid_mask_loc, batch_size, seed=101)
train_generator = data_gen_combined(train_loc, train_mask_loc, batch_size, seed=101)

model = unet_standard(**parameters)
print(model.summary())
name = model_str+'_'+'custom_aug'
out_file=os.path.join(str(out_loc), name)
checkpointer = ModelCheckpoint(filepath=os.path.join(out_loc, name+'_.{epoch:02d}-{distance_loss:.2f}.hdf5'), verbose=1, monitor='distance_loss', save_best_only=True)

callbacks = [EarlyStopping(monitor='distance_loss', patience=15, verbose=0),
    ModelCheckpoint(filepath=os.path.join(out_loc, name + '_.{epoch:02d}-{distance_loss:.2f}.hdf5'), 
        verbose=1, monitor='distance_loss', save_best_only=True)]

hist = model.fit_generator(train_generator,
                                  validation_data=valid_generator,
                                  steps_per_epoch=steps_per_epoch, 
                                  epochs=epochs,
                                  validation_steps=validation_steps,
                                  callbacks=callbacks)
pickle.dump(hist.history, open(out_file, 'wb'))

Using TensorFlow backend.


/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/train/0
/home/rbbidart/project/rbbidart/cancer_hist/full_slides2_sample/valid/0
num_train 15
num_valid 4
validation_steps 2.0


NameError: name 'data_gen_combined' is not defined