## Split the data. All of it is in train folder right now

## Data Loading and Formatting

In [None]:
from glob import glob
import os

path_to_train = 'data/train'
glob_train_imgs = os.path.join(path_to_train, '*_sat.jpg')
glob_train_masks = os.path.join(path_to_train, '*_msk.png')

train_img_paths = glob(glob_train_imgs)
train_mask_paths = glob(glob_train_masks)

In [None]:
from skimage.io import imread
from skimage.transform import resize
from skimage.color import rgb2gray
import cv2

# This will be useful so we can construct the corresponding mask
def get_img_id(img_path):
    img_basename = os.path.basename(img_path)
    img_id = os.path.splitext(img_basename)[0][:-len('_sat')]
    return img_id

# Create image generator and preform preprocessing
def image_gen(img_paths, img_size=(512, 512), train=True):
    # Iterate over all the image paths
    for img_path in img_paths:
        
        # Construct the corresponding mask path
        img_id = get_img_id(img_path)
        
        # Normalize it to 0-1 range
        img = img / 255.  
        ##############################
        
        # Get mask information for train data
        if train:
            mask_path = os.path.join(path_to_train, img_id + '_msk.png')
            mask = rgb2gray(imread(mask_path))
#             mask = resize(mask, img_size, mode='constant', preserve_range=True)
            # Turn the mask back into a 0-1 mask
            mask = (mask >= 0.5).astype(float)
            
            # Yield the image mask pair
            yield img, mask
            
        else:
            yield img

## Metric Definitions

In [None]:
import keras.backend.tensorflow_backend as K
from keras.losses import binary_crossentropy

smooth = 1e-9

# This is the competition metric implemented using Keras
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred = K.cast(y_pred, 'float32')
    y_pred_f = K.cast(K.greater(K.flatten(y_pred), 0.5), 'float32')
    intersection = y_true_f * y_pred_f
    score = 2. * (K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return score

# We'll construct a Keras Loss that incorporates the DICE score
def dice_loss(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return 1. - (2. * intersection + 1.) / (K.sum(y_true_f) + K.sum(y_pred_f) + 1.)


def bce_dice_loss(y_true, y_pred):
    return 0.5 * binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)

In [None]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
from keras.optimizers import Adam

## Load Model and Test

In [None]:
# Predict
import multiprocessing
from keras.models import load_model

model = load_model(filepath, custom_objects={"dice_coef": dice_coef})

num_cores = multiprocessing.cpu_count()

path_to_test = 'data/val'
glob_test_imgs = os.path.join(path_to_test, '*_sat.jpg')

test_img_paths = glob(glob_test_imgs)

testgen = test_batch_generator(test_img_paths)

# Steps is the number of batches to compute results for
test_masks = model.predict_generator(testgen, steps=calc_steps(len(test_img_paths), BATCHSIZE), verbose=1)
# test_masks = model.predict_generator(testgen, steps=calc_steps(len(test_img_paths), BATCHSIZE), workers=num_cores, use_multiprocessing=True, verbose=1)


## References

Source for the pretrained UNet model:
    https://www.kaggle.com/kmader/data-preprocessing-and-unet-segmentation-gpu