In [1]:
path = '/notebook/kaggle/'
PROJ_HOME_DIR = path + 'data/ultrasound/'
DATA_HOME_DIR = PROJ_HOME_DIR # + 'sample/'

results_path = DATA_HOME_DIR + 'results/'
test_path = DATA_HOME_DIR + 'test/'
valid_path = DATA_HOME_DIR + 'valid/'
train_path = DATA_HOME_DIR + 'train/'

In [2]:
!pip install scikit-image
!pip install kaggle-cli

Collecting kaggle-cli
  Downloading kaggle-cli-0.12.10.tar.gz
Collecting cliff<2.9,>=2.8.0 (from kaggle-cli)
  Downloading cliff-2.8.0-py2-none-any.whl (68kB)
[K    100% |################################| 71kB 1.7MB/s ta 0:00:01
[?25hCollecting MechanicalSoup<0.9,>=0.7.0 (from kaggle-cli)
  Downloading MechanicalSoup-0.8.0-py2.py3-none-any.whl
Collecting lxml<4.1,>=4.0.0 (from kaggle-cli)
  Downloading lxml-4.0.0-cp27-cp27mu-manylinux1_x86_64.whl (5.3MB)
[K    100% |################################| 5.3MB 249kB/s eta 0:00:01
[?25hCollecting cssselect<1.1,>=1.0.1 (from kaggle-cli)
  Downloading cssselect-1.0.1-py2.py3-none-any.whl
Collecting progressbar2<3.35,>=3.34.3 (from kaggle-cli)
  Downloading progressbar2-3.34.3-py2.py3-none-any.whl
Collecting unicodecsv>=0.8.0; python_version < "3.0" (from cliff<2.9,>=2.8.0->kaggle-cli)
  Downloading unicodecsv-0.14.1.tar.gz
Collecting PrettyTable<0.8,>=0.7.1 (from cliff<2.9,>=2.8.0->kaggle-cli)
  Downloading prettytable-0.7.2.zip
Collecting

In [3]:
import json

with open(path+'kaggle.txt') as json_file:  
    kg_data = json.load(json_file)
    
username = kg_data['username']
password = kg_data['password']

In [4]:
!kg config -g -u $username -p $password -c 'ultrasound-nerve-segmentation'

'''
%cd $DATA_HOME_DIR
!kg download
'''

'\n%cd $DATA_HOME_DIR\n!kg download\n'

In [5]:
!echo $CUDA_VISIBLE_DEVICES

0


In [6]:
from __future__ import print_function

import os
from skimage.transform import resize
from skimage.io import imsave
import numpy as np
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras import backend as K

#import sys
#sys.path.append('/notebook/kaggle/data/ultrasound')
#from data import load_train_data, load_test_data

K.set_image_data_format('channels_last')  # TF dimension ordering in this code

img_rows = 96
img_cols = 96

Using TensorFlow backend.


In [7]:
def preprocess(imgs):
    ''' Convert images to np arrays of 8-bit unsigned integers and standardize shape '''
    imgs_p = np.ndarray((imgs.shape[0], img_rows, img_cols), dtype=np.uint8)
    for i in range(imgs.shape[0]):
        imgs_p[i] = resize(imgs[i], (img_cols, img_rows), preserve_range=True)

    imgs_p = imgs_p[..., np.newaxis]
    return imgs_p

This competition is evaluated on the mean [Dice coefficient](https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient). The Dice coefficient can be used to compare the pixel-wise agreement between a predicted segmentation and its corresponding ground truth.

In [8]:
# Dice coefficient between actual and predicted pixels
def dice_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection) / (K.sum(y_true_f) + K.sum(y_pred_f))

# Loss of Dice coefficient between actual and predicted pixels
def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

Create and compile [**u-net model**](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/): a network that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization.
[More info...](https://arxiv.org/abs/1505.04597)

In [9]:
inputs = Input((img_rows, img_cols, 1))
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

model = Model(inputs=[inputs], outputs=[conv10])

model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=[dice_coef])
model_checkpoint = ModelCheckpoint(results_path+'weights.h5', monitor='val_loss', save_best_only=True)

In [None]:
# Load and preprocess train data
imgs_train = preprocess(np.load(DATA_HOME_DIR+'imgs_train.npy'))
imgs_mask_train = preprocess(np.load(DATA_HOME_DIR+'imgs_mask_train.npy'))

# Normalize data
imgs_train = imgs_train.astype('float32')
mean = np.mean(imgs_train)  # Mean for data centering
std = np.std(imgs_train)  # std for data normalization

imgs_train -= mean
imgs_train /= std

imgs_mask_train = imgs_mask_train.astype('float32')
imgs_mask_train /= 255.  # scale masks to [0, 1]

  warn("The default mode, 'constant', will be changed to 'reflect' in "


Train model

In [None]:
model.fit(imgs_train, imgs_mask_train, batch_size=32, epochs=20, verbose=1, shuffle=True,
          validation_split=0.2,
          callbacks=[model_checkpoint])

kwargs passed to function are ignored with Tensorflow backend


Train on 4508 samples, validate on 1127 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20

Make predictions on test data

In [None]:
# Load and preprocess test data
imgs_test = preprocess(np.load(DATA_HOME_DIR+'imgs_test.npy'))
imgs_id_test = np.load(DATA_HOME_DIR+'imgs_id_test.npy')

imgs_test = imgs_test.astype('float32')
imgs_test -= mean
imgs_test /= std

In [None]:
# Load saved weights
model.load_weights(results_path+'weights.h5')

# Predict masks using test data
imgs_mask_test = model.predict(imgs_test, verbose=1)
np.save('imgs_mask_test.npy', imgs_mask_test)

In [None]:
# Save predicted masks to file
pred_dir = results_path+'preds'
if not os.path.exists(pred_dir):
    os.mkdir(pred_dir)
for image, image_id in zip(imgs_mask_test, imgs_id_test):
    image = (image[:, :, 0] * 255.).astype(np.uint8)
    imsave(os.path.join(pred_dir, str(image_id) + '_pred.png'), image)

Generate submission file

In [None]:
#from __future__ import print_function

#import numpy as np
from skimage.transform import resize

image_rows = 420
image_cols = 580

def prep(img):
    img = img.astype('float32')
    img = (img > 0.5).astype(np.uint8)  # threshold
    img = resize(img, (image_cols, image_rows), preserve_range=True)
    return img

def run_length_enc(label):
    from itertools import chain
    x = label.transpose().flatten()
    y = np.where(x > 0)[0]
    if len(y) < 10:  # consider as empty
        return ''
    z = np.where(np.diff(y) > 1)[0]
    start = np.insert(y[z+1], 0, y[0])
    end = np.append(y[z], y[-1])
    length = end - start
    res = [[s+1, l+1] for s, l in zip(list(start), list(length))]
    res = list(chain.from_iterable(res))
    return ' '.join([str(r) for r in res])

# Create .csv for submission
argsort = np.argsort(imgs_id_test)
imgs_id_test = imgs_id_test[argsort]
imgs_test = imgs_test[argsort]

total = imgs_test.shape[0]
ids = []
rles = []
for i in range(total):
    img = imgs_test[i, 0]
    img = prep(img)
    rle = run_length_enc(img)

    rles.append(rle)
    ids.append(imgs_id_test[i])

    if i % 1000 == 0:
        print('{}/{}'.format(i, total))

first_row = 'img,pixels'
file_name = results_path+'submission.csv'

with open(file_name, 'w+') as f:
    f.write(first_row + '\n')
    for i in range(total):
        s = str(ids[i]) + ',' + rles[i]
        f.write(s + '\n')

In [None]:
file_name = results_path+'submission.csv'
! kg submit $file_name