In [None]:
from __future__ import (absolute_import, division, print_function, unicode_literals)
import numpy as np
import pandas as pd
import os
import glob

from matplotlib import cm
from matplotlib import pyplot as plt

import pydicom

import numpy as np

from tqdm import tqdm_notebook

In [None]:
def show_dcm_info(dataset):
    print("Filename.........:", file_path)
    print("Storage type.....:", dataset.SOPClassUID)
    print()

    #pat_name = dataset.PatientName
    #display_name = pat_name.family_name + ", " + pat_name.given_name
    #print("Patient's name......:", display_name)
    
    print("Patient's name......:", dataset.PatientName)
    print("Patient id..........:", dataset.PatientID)
    print("Patient's Age.......:", dataset.PatientAge)
    print("Patient's Sex.......:", dataset.PatientSex)
    print("Modality............:", dataset.Modality)
    print("Body Part Examined..:", dataset.BodyPartExamined)
    print("View Position.......:", dataset.ViewPosition)
    
    if 'PixelData' in dataset:
        rows = int(dataset.Rows)
        cols = int(dataset.Columns)
        print("Image size.......: {rows:d} x {cols:d}, {size:d} bytes".format(
            rows=rows, cols=cols, size=len(dataset.PixelData)))  #1024 x 1024, 175972 bytes
        if 'PixelSpacing' in dataset:
            print("Pixel spacing....:", dataset.PixelSpacing) #['0.139', '0.139']

def plot_pixel_array(dataset, figsize=(10,10)):
    plt.figure(figsize=figsize)
    plt.imshow(dataset.pixel_array, cmap=plt.cm.bone)
    plt.show()

In [None]:
def mask2rle(img, width, height):
    rle = []
    lastColor = 0;
    currentPixel = 0;
    runStart = -1;
    runLength = 0;

    for x in range(width):
        for y in range(height):
            currentColor = img[x][y]
            if currentColor != lastColor:
                if currentColor == 255:
                    runStart = currentPixel;
                    runLength = 1;
                else:
                    rle.append(str(runStart));
                    rle.append(str(runLength));
                    runStart = -1;
                    runLength = 0;
                    currentPixel = 0;
            elif runStart > -1:
                runLength += 1
            lastColor = currentColor;
            currentPixel+=1;

    return " ".join(rle)

def rle2mask(rle, width, height):
    mask= np.zeros(width* height)
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        current_position += start
        mask[current_position:current_position+lengths[index]] = 255
        current_position += lengths[index]

    return mask.reshape(width, height)

In [None]:
train_glob = '../input/siim-train-test/dicom-images-train/*/*/*.dcm'
#test_glob = 'siim-acr-pneumothorax-segmentation-data/pneumothorax/dicom-images-test/*/*/*.dcm'
df_full = pd.read_csv('../input/siim-train-test/train-rle.csv', index_col='ImageId')


load_size = 1000
train_fns = sorted(glob.glob(train_glob))[:load_size]
test_fns = sorted(glob.glob(train_glob))[load_size:load_size+150]

In [None]:
X = np.zeros((len(train_fns), 1024, 1024, 1), dtype=np.uint8) #Unsigned integer (0 to 255)
#masks
Y = np.zeros((len(train_fns), 1024, 1024, 1), dtype=np.bool) 

In [None]:
for n, _id in tqdm_notebook(enumerate(train_fns), total=len(train_fns)):
    dataset = pydicom.read_file(_id)
    X[n] = np.expand_dims(dataset.pixel_array, axis=2)
    
    try:
        if '-1' in df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels']:  #pneumothorax not present
            Y[n] = np.zeros((1024, 1024, 1))   #mask is all black
        else:
            if type(df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels']) == str: #if theres a rle mask given
                Y[n] = np.expand_dims(rle2mask(df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels'], 1024, 1024), axis=2)
            else:
                Y[n] = np.zeros((1024, 1024, 1))  #mask is still all black
                for x in df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels']:
                    Y[n] =  Y[n] + np.expand_dims(rle2mask(x, 1024, 1024), axis=2)
    except KeyError:
        print(f"Key {_id.split('/')[-1][:-4]} without mask, assuming healthy patient.")
        Y[n] = np.zeros((1024, 1024, 1)) # Assume missing masks are empty masks.

print('Done!')

In [None]:
X = X.reshape((-1, 128, 128, 1))
Y = Y.reshape((-1, 128, 128, 1))

In [None]:
X_val = np.zeros((len(test_fns), 1024, 1024, 1), dtype=np.uint8) #Unsigned integer (0 to 255)
#masks
Y_val = np.zeros((len(test_fns), 1024, 1024, 1), dtype=np.bool) 

In [None]:
for n, _id in tqdm_notebook(enumerate(test_fns), total=len(test_fns)):
    dataset = pydicom.read_file(_id)
    X_val[n] = np.expand_dims(dataset.pixel_array, axis=2)
    
    try:
        if '-1' in df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels']:  #pneumothorax not present
            Y_val[n] = np.zeros((1024, 1024, 1))   #mask is all black
        else:
            if type(df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels']) == str: #if theres a rle mask given
                Y_val[n] = np.expand_dims(rle2mask(df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels'], 1024, 1024), axis=2)
            else:
                Y_val[n] = np.zeros((1024, 1024, 1))  #mask is still all black
                for x in df_full.loc[_id.split('/')[-1][:-4],' EncodedPixels']:
                    Y_val[n] =  Y_val[n] + np.expand_dims(rle2mask(x, 1024, 1024), axis=2)
    except KeyError:
        print(f"Key {_id.split('/')[-1][:-4]} without mask, assuming healthy patient.")
        Y_val[n] = np.zeros((1024, 1024, 1)) # Assume missing masks are empty masks.

print('Done!')

In [None]:
X_val = X_val.reshape((-1, 128, 128, 1))
Y_val = Y_val.reshape((-1, 128, 128, 1))

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
datagen = ImageDataGenerator(featurewise_center=True,
                             zca_whitening=True,
                             width_shift_range=0.2, 
                             height_shift_range=0.2)

In [None]:
datagen.fit(X)

In [None]:
datagenval = ImageDataGenerator()

In [None]:
from keras.models import *
from keras.layers import *
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [None]:
img_input = Input(shape=(128,128,1))

#encode
conv1_1 = Conv2D(32, (3, 3), activation='relu', padding='same')(img_input)
pool1 = MaxPooling2D((2, 2))(conv1_1)

conv2_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)     #Make 3,3 (?)
pool2 = MaxPooling2D((2, 2))(conv2_1)

#decode
conv3_1 = Conv2D(32, (3, 3), activation='relu', padding='same')(pool2)
conv3_2 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv3_1)
conv3_3 = Add()([conv3_2,conv3_1])

conv3_3 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv3_2)
conv3_3 = Add()([conv3_3,conv3_2])

conv3_4 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv3_3)
conv3_3 = Add()([conv3_4,conv3_3])


up1 = concatenate([UpSampling2D((2, 2))(conv3_3), conv2_1], axis=-1)
conv4_1 = Conv2D(64, (3, 3), activation='relu', padding='same')(up1)

conv4_4 = Add()([conv4_1,conv2_1])

conv4_6 = Add()([conv4_4,conv4_1])

up2 = concatenate([UpSampling2D((2, 2))(conv4_6), conv1_1], axis=-1)
conv5_1 = Conv2D(32, (3, 3), activation='relu', padding='same')(up2)


conv5_4 = Add()([conv5_1,conv1_1])

conv5_6 = Add()([conv5_4,conv5_1])

out = Conv2D( 1, (1, 1) , padding='same')(conv5_6)
out = (Activation('softmax'))(out)

In [None]:
model = Model(inputs=[img_input], outputs=[out])
model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

callbacks = [
    EarlyStopping(patience=10, verbose=2),
    ReduceLROnPlateau(factor=0.1, patience=5, min_lr=0.00001, verbose=2),
    ModelCheckpoint('model-pneumothorax.h5', verbose=2, save_best_only=True, save_weights_only=True)
]

In [None]:
results = model.fit_generator(datagen.flow(X, Y, batch_size=32),
                            validation_data=datagenval.flow(X_val,Y_val),
                            steps_per_epoch=int(len(X)//32),
                            validation_steps=int(len(X_val)//32),
                            epochs=60,verbose=2)

In [None]:
print(results.history['val_loss'])

In [None]:
print(results.history['loss'])

In [None]:
preds_val = model.predict(X_val, verbose=1)
threshold = 0.5
preds_val_t = (preds_val > threshold).astype(np.uint8)

def plot_sample(X, Y, preds, binary_preds, ix=None):
    """Function to plot the results"""
    if ix is None:
        ix = random.randint(0, len(X))

    has_mask = y[ix].max() > 0

    fig, ax = plt.subplots(1, 4, figsize=(20, 10))
    ax[0].imshow(X[ix, ..., 0], cmap='grey')
    if has_mask:
        ax[0].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[0].set_title('Dataset')

    ax[1].imshow(y[ix].squeeze())
    ax[1].set_title('Pneumothorax')

    ax[2].imshow(preds[ix].squeeze(), vmin=0, vmax=1)
    if has_mask:
        ax[2].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[2].set_title('Pneumothorax Predicted')
    
    ax[3].imshow(binary_preds[ix].squeeze(), vmin=0, vmax=1)
    if has_mask:
        ax[3].contour(y[ix].squeeze(), colors='k', levels=[0.5])
    ax[3].set_title('Pneumothorax Predicted binary');

In [None]:
# Check if valid data looks all right
plot_sample(X_valid, y_valid, preds_val, preds_val_t)