In [None]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
import pydicom
from scipy import ndimage
from tqdm import tqdm
import sys
import matplotlib.pyplot as plt

basepath = Path('../input/rsna-str-pulmonary-embolism-detection/')

In [None]:
train = pd.read_csv(basepath / 'train.csv')
train['dcmpath'] = str(basepath) + '/train' + '/' + train.StudyInstanceUID + '/' + train.SeriesInstanceUID

In [None]:
studies = {}
label_names = ['negative_exam_for_pe', 'qa_motion', 'qa_contrast', 'flow_artifact', 'rv_lv_ratio_gte_1', 'rv_lv_ratio_lt_1', 'leftsided_pe', 'chronic_pe', 'true_filling_defect_not_pe', 'rightsided_pe', 'acute_and_chronic_pe', 'central_pe', 'indeterminate']

N = 2000000

for i, row in tqdm(enumerate(train.iterrows())):
    
    if i >= N: break
    
    study_uid = row[1]['StudyInstanceUID']
    serie_uid = row[1]['SeriesInstanceUID']
    slice_uid = row[1]['SOPInstanceUID']
    
    if study_uid not in studies.keys():
        studies[study_uid] = {**{label: row[1][label] for label in label_names}, 'series': {}}
        
    series = studies[study_uid]['series']
    
    if serie_uid not in series.keys():
        series[serie_uid] = {'slices': {}}
        
    slices = series[serie_uid]['slices']
    slices[slice_uid] = {'pe_present_on_image': row[1]['pe_present_on_image']}

In [None]:
print(len(studies))

In [None]:
"""lengths = []
for suid in serie_uids:
    files = np.array(list(Path(suid).iterdir()))
    lengths.append(len(files))
print(np.mean(lengths))
print(np.std(lengths))
print(np.min(lengths))
print(np.max(lengths))"""

In [None]:
def get_scans(study_uids, size=128, depth=64):
    scans = []
    #labels = []
    targets = []
    for study_uid in study_uids:
        #labels.append({label: studies[study_uid][label] for label in label_names})
        serie_uid = list(studies[study_uid]['series'].keys())[0]
        serie_path = '../input/rsna-str-pulmonary-embolism-detection/train/{0}/{1}'.format(study_uid, serie_uid)
        slice_paths = list(Path(serie_path).iterdir())
        #labels[-1]['num_slices'] = len(slice_paths)
        slice_dcms = np.array([pydicom.dcmread(str(path)) for path in slice_paths])
        pe_present_on_image = np.array([studies[study_uid]['series'][serie_uid]['slices'][os.path.basename(path).split('.')[0]]['pe_present_on_image'] for path in slice_paths])
        sortkey = np.argsort([float(s.ImagePositionPatient[2]) for s in slice_dcms])
        slice_dcms = slice_dcms[sortkey]
        pe_present_on_image = pe_present_on_image[sortkey]
        pe_present_on_image_resized = ndimage.interpolation.zoom(pe_present_on_image, 2772./len(pe_present_on_image), order=2)
        #labels[-1]['pe_present_on_image'] = pe_present_on_image
        #labels[-1]['pe_present_on_image_resized'] = pe_present_on_image_resized
        scan = []
        for dcm in slice_dcms:
            try:
                scan.append(dcm.pixel_array.astype(np.float32)*dcm.RescaleSlope + dcm.RescaleIntercept)
            except Exception as e:
                print(repr(e))
            del dcm
        del slice_dcms
        scan = np.stack(scan, axis=0)
        sf = size / 512.
        df = depth / float(scan.shape[0])
        scan = ndimage.interpolation.zoom(scan, (df, sf, sf), order=1)
        scan = np.clip(scan, -1000, 1000)/1000
        scans.append(scan)
        del scan
        targets.append([studies[study_uid]['negative_exam_for_pe'],
                       studies[study_uid]['indeterminate'],
                       studies[study_uid]['chronic_pe'],
                       studies[study_uid]['acute_and_chronic_pe'],
                       studies[study_uid]['central_pe'],
                       studies[study_uid]['leftsided_pe'],
                       studies[study_uid]['rightsided_pe'],
                       studies[study_uid]['rv_lv_ratio_gte_1'],
                       studies[study_uid]['rv_lv_ratio_lt_1'],
                        *pe_present_on_image_resized])
    targets = np.array(targets)
    scans = np.array(scans)[..., None]
    return scans, targets


In [None]:
!ls -l


In [None]:
n = 1200
while n <= 7279:
    m = n + 100
    m = min(m, 7279)
    print('processing {0} to {1}.npy'.format(n, m))
    scans, targets = get_scans(list(studies.keys())[n:m])
    np.save('scans_{0}_to_{1}.npy'.format(n, m), scans)
    np.save('targets_{0}_to_{1}.npy'.format(n, m), targets)
    del scans
    del targets
    if n == 7279: break
    n = m

In [None]:
N_SCANS = 100
PERCENT_TRAIN = 70

scans, targets = get_scans(list(studies.keys())[0:0+N_SCANS])

n = int(np.round(PERCENT_TRAIN/100. * scans.shape[0]))
x_train = scans[:n]
y_train = targets[:n]
x_val = scans[n:]
y_val = targets[n:]

In [None]:
scan = scans[0]

plt.hist(scan.flat, bins=100)
plt.show()

fig, ax = plt.subplots(5, 4, figsize=(20,20))
ax = ax.flatten()
for m in range(20):
    ax[m].imshow(scan[m], cmap='Blues_r')

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import BatchNormalization, Conv3D, MaxPooling3D, AveragePooling3D, Lambda, UpSampling1D, Conv1D, GlobalMaxPooling3D, Dense ,Flatten, Activation

In [None]:
SIZE = 128
DEPTH = 64

tf.keras.backend.clear_session()

input_tensor = tf.keras.layers.Input(shape=(DEPTH, SIZE, SIZE, 1))
target_input = tf.keras.layers.Input(shape=(2781,))

x = Conv3D(filters=16, kernel_size=(3, 3, 3), padding='valid', strides=(2, 2, 2))(input_tensor)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = Conv3D(filters=24, kernel_size=(3, 3, 3), padding='valid', strides=(1, 2, 2))(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = Conv3D(filters=40, kernel_size=(3, 3, 3), padding='valid', strides=(1, 2, 2))(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = Conv3D(filters=64, kernel_size=(3, 3, 3), padding='valid', strides=(1, 2, 2))(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

x = Conv3D(filters=128, kernel_size=(3, 3, 3), padding='valid', strides=(1, 2, 2))(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

y = Lambda(lambda t: tf.keras.backend.mean(t, axis=[2, 3]))(x)
y = UpSampling1D(size=4)(y)
y = Conv1D(filters=64, kernel_size=5, activation='relu')(y)
y = UpSampling1D(size=4)(y)
y = Conv1D(filters=32, kernel_size=5, activation='relu')(y)
y = UpSampling1D(size=4)(y)
y = Conv1D(filters=16, kernel_size=5, activation='relu')(y)
y = UpSampling1D(size=2)(y)
y = Conv1D(filters=1, kernel_size=5)(y)
y = Flatten()(y)

z = Conv3D(filters=128, kernel_size=(3, 1, 1), padding='valid', strides=(2, 1, 1))(x)
z = BatchNormalization()(z)
z = Activation('relu')(z)

z = GlobalMaxPooling3D()(z)
z = Dense(9)(z)

class Loss(tf.keras.layers.Layer):
    def __init__(self, *args, **kwargs):
        super(Loss, self).__init__(*args, **kwargs)
    def call(self, x):
        """
        x: of shape (batch_size, 9 + size of image vector)
        """
        
        gt, pred = x
        
        exam_gt = gt[..., :9]
        exam_pred = pred[..., :9]
        im_gt = gt[..., 9:]
        im_pred = pred[..., 9:]
        
        weights = tf.constant([0.0736196319, 0.09202453988, 0.1042944785, 0.1042944785, 0.1877300613, 0.06257668712, 0.06257668712, 0.2346625767, 0.0782208589])
        weights = tf.constant([1, 0, 0, 0, 0, 0, 0, 0, 0], tf.float32)
        weights = weights[None, ...]
        
        exam_loss = tf.reduce_sum(weights * tf.nn.sigmoid_cross_entropy_with_logits(labels=exam_gt, logits=exam_pred), axis=-1)
        
        #tf.print(exam_gt[0])
        #tf.print(tf.nn.sigmoid(exam_pred[0]))
        #tf.print(exam_loss[0])
        
        #q = tf.reduce_mean(im_gt, axis=-1)
        #image_loss = 0.25 * 0.07361963 * q * tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=im_gt, logits=im_pred), axis=-1)
        image_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=im_gt, logits=im_pred), axis=-1)
        
        loss = exam_loss + image_loss
        
        #loss = loss / tf.reduce_sum(weights) + 0.07361963 * q
        loss = tf.reduce_mean(loss) # mean across batch
        
        self.add_loss(loss)
        self.add_metric(exam_loss, aggregation='mean', name='exam_loss')
        self.add_metric(image_loss, aggregation='mean', name='image_loss')
        
        return loss

pred = Lambda(lambda x: tf.keras.backend.concatenate([x[0], x[1]], axis=-1))([z, y])
loss = Loss()([target_input, pred])

model = tf.keras.Model(inputs=[input_tensor, target_input], outputs=[pred, loss])
#model.summary()
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3))

model.fit(x=[x_train, y_train], y=None, batch_size=16, epochs=20, validation_data=[x_val, y_val], validation_freq=2)

#print(model(scans))

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4))
model.fit(x=[x_train, y_train], y=None, batch_size=16, epochs=20, validation_data=[x_val, y_val], validation_freq=2)

In [None]:
model.save_weights('model.h5')
x = 5
pred, loss = model.predict([scans[x:x+1], targets[x:x+1]])

In [None]:
%matplotlib inline

x = 2
pred, loss = model.predict([scans[x:x+1], targets[x:x+1]])

pred_sig = 1/(1 + np.exp(-pred)) 

plt.plot(pred_sig[0])
plt.plot(targets[x])
plt.show()

plt.plot(pred_sig[0, :9])
plt.plot(targets[x, :9])
plt.show()

with np.printoptions(precision=3, suppress=True):
    print(np.concatenate([pred_sig[..., :9], targets[x:x+1][..., :9]], axis=0))
