In [1]:
import numpy as np
import pandas as pd
import pydicom
import os
from os import listdir

from os.path import isfile, join
import matplotlib.pyplot as plt
import collections
from tqdm import tqdm_notebook as tqdm
from datetime import datetime

from math import ceil, floor, log
import cv2

import tensorflow as tf
import keras

import sys

# from keras_applications.resnet import ResNet50
from keras_applications.inception_v3 import InceptionV3

from sklearn.model_selection import ShuffleSplit
test_images_dir = '/home/sichun1247/Sampled_Test/'
train_images_dir = '/home/sichun1247/Sampled_Train/'

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def read_testset(filename="/home/sichun1247/test_sample.csv"):
    df = pd.read_csv(filename)

    
    df = df.loc[:, ["Label", "Type", "ID_Image"]]
    df = df.set_index(['ID_Image', 'Type']).unstack(level=-1)
    
    return df

def read_trainset(filename="/home/sichun1247/train_sample.csv"):
    df = pd.read_csv(filename)

    
    
    df = df.loc[:, ["Label", "Type", "ID_Image"]]
    df = df.set_index(['ID_Image', 'Type']).unstack(level=-1)
    
    return df

train_df = read_trainset()   
test_df = read_testset()

In [3]:
train_df.columns = train_df.columns.get_level_values(1)
test_df.columns = test_df.columns.get_level_values(1)

In [4]:
train_df.reset_index(inplace=True)
test_df.reset_index(inplace=True)

In [5]:
def correct_dcm(dcm):
    x = dcm.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    dcm.PixelData = x.tobytes()
    dcm.RescaleIntercept = -1000

def window_image(dcm, window_center, window_width):
    
    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
        correct_dcm(dcm)
    
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)

    return img
def bsb_window(dcm):
    brain_img = window_image(dcm, 40, 80)
    subdural_img = window_image(dcm, 80, 200)
    soft_img = window_image(dcm, 40, 380)
    
    brain_img = (brain_img - 0) / 80
    subdural_img = (subdural_img - (-20)) / 200
    soft_img = (soft_img - (-150)) / 380
    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)

    return bsb_img
# dicom = pydicom.dcmread(train_images_dir + 'ID_5c8b5d701' + '.dcm')


In [16]:
def window_with_correction(dcm, window_center, window_width):
    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
        correct_dcm(dcm)
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)
    return img

def window_without_correction(dcm, window_center, window_width):
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)
    return img
def window_testing(img, window):
    brain_img = window(img, 40, 80)
    subdural_img = window(img, 80, 200)
    soft_img = window(img, 40, 380)
    
    brain_img = (brain_img - 0) / 80
    subdural_img = (subdural_img - (-20)) / 200
    soft_img = (soft_img - (-150)) / 380
    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)

    return bsb_img

# # example of a "bad data point" (i.e. (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100) == True)
# dicom = pydicom.dcmread(train_images_dir + "ID_036db39b7" + ".dcm")

# fig, ax = plt.subplots(1, 2)
# fig, ax = plt.subplots(1, 2)

# ax[0].imshow(window_testing(dicom, window_without_correction), cmap=plt.cm.bone);
# ax[0].set_title("original")
# ax[1].imshow(window_testing(dicom, window_with_correction), cmap=plt.cm.bone);
# ax[1].set_title("corrected")

In [6]:
def _read(path, desired_size):
    """Will be used in DataGenerator"""
    
    dcm = pydicom.dcmread(path)
    
    try:
        img = bsb_window(dcm)
    except:
        img = np.zeros(desired_size)
    
    
    img = cv2.resize(img, desired_size[:2], interpolation=cv2.INTER_LINEAR)
    
    return img

In [9]:
train_images = [f for f in listdir(train_images_dir) if isfile(join(train_images_dir, f))]
test_images = [f for f in listdir(test_images_dir) if isfile(join(test_images_dir, f))]

In [10]:
train_images_ID = [s.strip('.dcm') for s in train_images]
test_images_ID = [s.strip('.dcm') for s in test_images]

In [None]:
p_train = []
for num in range(70616):
    #print(num)
    dicom = _read(train_images_dir+train_images[num], (128, 128)) 
    p_train.append(dicom)
#np.save('train1',p_train)

p_train = []
for num in range(70616,len(train_images)):
    #print(num)
    dicom = _read(train_images_dir+train_images[num], (128, 128)) 
    p_train.append(dicom)
#np.save('train2',p_train)


p_test = []
for num2 in range(len(test_images)):
    #print(num2)
    dicom = _read(test_images_dir+test_images[num2], (128, 128)) 
    p_test.append(dicom)
np.save('test',p_test)
    

In [6]:
# tt = np.load('train1.npy')
# pp = np.load('train2.npy')
# test = np.load('test.npy')

In [7]:
# p_train = []
# for item in tt:
#     p_train.append(item)
# for item in pp:
#     p_train.append(item)
# p_test = []
# for item in test:
#     p_test.append(item)

In [11]:
l_train =  list(zip(train_images_ID, p_train))
train_pixel = pd.DataFrame(l_train, columns = ['ID_Image' , 'pixel']) 
l_test =  list(zip(test_images_ID, p_test))
test_pixel = pd.DataFrame(l_test, columns = ['ID_Image' , 'pixel']) 

In [12]:
train_merge = train_df.merge(train_pixel, left_on='ID_Image', right_on='ID_Image', how='inner')
test_merge = test_df.merge(test_pixel, left_on='ID_Image', right_on='ID_Image', how='inner')

In [13]:
train_X = train_merge.drop(columns = ['ID_Image','any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural'])
train_X = np.stack(train_X['pixel'].tolist(),axis=0)
train_Y = np.array(train_merge[['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']])
test_X = test_merge.drop(columns = ['ID_Image','any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural'])
test_X = np.stack(test_X['pixel'].tolist(),axis=0)
test_Y = np.array(test_merge[['any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']])


In [13]:
import keras
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16, preprocess_input

In [14]:
from keras import backend as K

def weighted_log_loss(y_true, y_pred):
    """
    Can be used as the loss function in model.compile()
    ---------------------------------------------------
    """
    
    class_weights = np.array([2., 1., 1., 1., 1., 1.])
    
    eps = K.epsilon()
    
    y_pred = K.clip(y_pred, eps, 1.0-eps)

    out = -(         y_true  * K.log(      y_pred) * class_weights
            + (1.0 - y_true) * K.log(1.0 - y_pred) * class_weights)
    
    return K.mean(out, axis=-1)
def _normalized_weighted_average(arr, weights=None):
    """
    A simple Keras implementation that mimics that of 
    numpy.average(), specifically for this competition
    """
    
    if weights is not None:
        scl = K.sum(weights)
        weights = K.expand_dims(weights, axis=1)
        return K.sum(K.dot(arr, weights), axis=1) / scl
    return K.mean(arr, axis=1)

def weighted_loss(y_true, y_pred):
    """
    Will be used as the metric in model.compile()
    ---------------------------------------------
    
    Similar to the custom loss function 'weighted_log_loss()' above
    but with normalized weights, which should be very similar 
    to the official competition metric:
        https://www.kaggle.com/kambarakun/lb-probe-weights-n-of-positives-scoring
    and hence:
        sklearn.metrics.log_loss with sample weights
    """
    
    class_weights = K.variable([2., 1., 1., 1., 1., 1.])
    
    eps = K.epsilon()
    
    y_pred = K.clip(y_pred, eps, 1.0-eps)

    loss = -(        y_true  * K.log(      y_pred)
            + (1.0 - y_true) * K.log(1.0 - y_pred))
    
    loss_samples = _normalized_weighted_average(loss, class_weights)
    
    return K.mean(loss_samples)

def weighted_log_loss_metric(trues, preds):
    """
    Will be used to calculate the log loss 
    of the validation set in PredictionCheckpoint()
    ------------------------------------------
    """
    class_weights = [2., 1., 1., 1., 1., 1.]
    
    epsilon = 1e-7
    
    preds = np.clip(preds, epsilon, 1-epsilon)
    loss = trues * np.log(preds) + (1 - trues) * np.log(1 - preds)
    loss_samples = np.average(loss, axis=1, weights=class_weights)

    return - loss_samples.mean()

In [15]:
cnn_model = Sequential()
# Convolutional Layer 1:
cnn_model.add(Conv2D(32, kernel_size = (3, 3), activation = 'linear', input_shape = (128, 128, 3), padding = 'same'))
cnn_model.add(LeakyReLU(alpha = 0.1))
# Max Pooling Layer 1:
cnn_model.add(MaxPooling2D((2, 2)))
# Convolutional Layer 2:
cnn_model.add(Conv2D(64, kernel_size = (3, 3), activation = 'linear', padding = 'same'))
cnn_model.add(LeakyReLU(alpha = 0.1))
# Max Pooling Layer 2:
cnn_model.add(MaxPooling2D((2, 2)))
# Convolutional Layer 3:
cnn_model.add(Conv2D(128, kernel_size = (3, 3), activation = 'linear', padding = 'same'))
cnn_model.add(LeakyReLU(alpha = 0.1))
# Max Pooling Layer 3:
cnn_model.add(MaxPooling2D((2, 2)))
# Flatten Preprocessing:
cnn_model.add(Flatten())
# Fully Connected Layer (Dense Layer):
cnn_model.add(Dense(128, activation = 'linear'))
cnn_model.add(LeakyReLU(alpha = 0.1))
# Output Layer (Dense Layer):
cnn_model.add(Dense(6, activation = 'sigmoid')) # choose one of them? # softmax

# Compile CNN model
cnn_model.compile(loss = "binary_crossentropy", optimizer = keras.optimizers.Adam(), metrics = [weighted_loss])
cnn_model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 128, 128, 32)      896       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 128, 128, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        18496     
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 64, 64, 64)        0         
_________________________________________________________________
max_poolin

In [None]:
class DataGenerator(keras.utils.Sequence):

    def __init__(self, list_IDs, labels=None, batch_size=1, img_size=(512, 512, 1), 
                 img_dir=train_images_dir, *args, **kwargs):

        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = img_dir
        self.on_epoch_end()

    def __len__(self):
        return int(ceil(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indices]
        
        if self.labels is not None:
            X, Y = self.__data_generation(list_IDs_temp)
            return X, Y
        else:
            X = self.__data_generation(list_IDs_temp)
            return X
        
    def on_epoch_end(self):
        
        
        if self.labels is not None: # for training phase we undersample and shuffle
            # keep probability of any=0 and any=1
            keep_prob = self.labels.iloc[:, 0].map({0: 0.35, 1: 0.5})
            keep = (keep_prob > np.random.rand(len(keep_prob)))
            self.indices = np.arange(len(self.list_IDs))[keep]
            np.random.shuffle(self.indices)
        else:
            self.indices = np.arange(len(self.list_IDs))

    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, *self.img_size))
        
        if self.labels is not None: # training phase
            Y = np.empty((self.batch_size, 6), dtype=np.float32)
        
            for i, ID in enumerate(list_IDs_temp):
                #X[i,] = _read(self.img_dir+ID+".dcm", self.img_size)
                X[i,] = [x[1] for x in l_train if x[0] == ID][0]
                Y[i,] = self.labels.loc[ID].values
        
            return X, Y
        
        else: # test phase
            if self.img_dir == '/home/sichun1247/Sampled_Test/':
                for i, ID in enumerate(list_IDs_temp):
                    #X[i,] = _read(self.img_dir+ID+".dcm", self.img_size)
                    X[i,] = [x[1] for x in l_test if x[0] == ID][0]
            else:
                for i, ID in enumerate(list_IDs_temp):
                    #X[i,] = _read(self.img_dir+ID+".dcm", self.img_size)
                    X[i,] = [x[1] for x in l_train if x[0] == ID][0]
            
            return X

In [None]:
from keras.callbacks import EarlyStopping
from sklearn.model_selection import ShuffleSplit
#datagen = ImageDataGenerator(
#    rotation_range = 30,
#    horizontal_flip = True)
#datagen.fit(train_X)

#early_stopping = EarlyStopping(monitor='val_weighted_loss', patience=2)

# Fits the model on batches with real-time data augmentation:
cnn_model_process = cnn_model.fit_generator(datagen.flow(train_X, train_Y, batch_size = 32),
                                                            steps_per_epoch = len(train_X) / 32, 
                                                            epochs = 20, verbose = 1, 
                                                            validation_data = (test_X, test_Y))

#cnn_model_process = cnn_model.fit(train_X, train_Y, batch_size = 32, epochs = 30, verbose = 1, 
#                                  validation_data = (test_X, test_Y))

self.model.fit_generator(
    DataGenerator(
        train_df.index, 
        train_df, 
        self.batch_size, 
        self.input_dims, 
        train_images_dir
    ),
    epochs=self.num_epochs,
    verbose=self.verbose,
    use_multiprocessing=True,
    workers=4,
    callbacks=[pred_history, scheduler]
)


Train on 123596 samples, validate on 18648 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30

In [None]:
# cnn_model_process.history

In [None]:
from matplotlib import pyplot

pyplot.plot(cnn_model_process.history['weighted_loss'], marker = 'o', label='train')
pyplot.plot(cnn_model_process.history['val_weighted_loss'], marker = 'o', label='validation')
pyplot.xlabel('Epochs', fontsize = 14)
pyplot.ylabel('Weighted Loss',fontsize = 14)
pyplot.title('CNN Weighted Loss Trainig VS Testing', fontsize = 14)
pyplot.legend()
pyplot.show()

In [18]:

cnn_model_dropout = Sequential()
# Convolutional Layer 1:
cnn_model_dropout.add(Conv2D(32, kernel_size = (3, 3), activation = 'linear', input_shape = (128, 128, 3), padding = 'same'))
cnn_model_dropout.add(LeakyReLU(alpha = 0.1))
# Max Pooling Layer 1:
cnn_model_dropout.add(MaxPooling2D((2, 2)))
# Convolutional Layer 2:
cnn_model_dropout.add(Conv2D(64, kernel_size = (3, 3), activation = 'linear', padding = 'same'))
cnn_model_dropout.add(LeakyReLU(alpha = 0.1))
# Max Pooling Layer 2:
cnn_model_dropout.add(MaxPooling2D((2, 2)))
# Convolutional Layer 3:
cnn_model_dropout.add(Conv2D(128, kernel_size = (3, 3), activation = 'linear', padding = 'same'))
cnn_model_dropout.add(LeakyReLU(alpha = 0.1))
# Max Pooling Layer 3:
cnn_model_dropout.add(MaxPooling2D((2, 2)))
# Flatten Preprocessing:
cnn_model_dropout.add(Flatten())
# Fully Connected Layer (Dense Layer):
cnn_model_dropout.add(Dense(128, activation = 'linear'))
cnn_model_dropout.add(BatchNormalization())
cnn_model_dropout.add(LeakyReLU(alpha = 0.1))

# Dropout Layer 4:
cnn_model_dropout.add(Dropout(0.3))
# Output Layer (Dense Layer):
cnn_model_dropout.add(Dense(6, activation = 'sigmoid'))

# Compile CNN model
cnn_model_dropout.compile(loss = "binary_crossentropy", optimizer = keras.optimizers.Adam(), metrics = [weighted_loss])
cnn_model_dropout.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 128, 128, 32)      896       
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 128, 128, 32)      0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 64, 64, 64)        18496     
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 64, 64, 64)        0         
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 32, 32, 128)      

In [19]:
# Fits the model on batches with real-time data augmentation:
# cnn_model_dropout_process = cnn_model_dropout.fit_generator(datagen.flow(train_X, train_Y, batch_size = 32),
#                                                             steps_per_epoch = len(train_X) / 32, 
#                                                             epochs = 20, verbose = 1, 
#                                                             validation_data = (test_X, test_Y))

cnn_model_dropout_process = cnn_model_dropout.fit(train_X, train_Y, batch_size = 32, epochs = 20, verbose = 1, 
                                                  validation_data = (test_X, test_Y))

Train on 123596 samples, validate on 18648 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20

KeyboardInterrupt: 

In [None]:
fig, axes = plt.subplots(1, 2, figsize = (10, 5))
axes[0].plot(range(1, len(cnn_model_process.history['loss']) + 1), cnn_model_process.history['loss'], linestyle = 'solid', marker = 'o', color = 'crimson', label = 'Training Loss')
axes[0].plot(range(1, len(cnn_model_process.history['val_loss']) + 1), cnn_model_process.history['val_loss'], linestyle = 'solid', marker = 'o', color = 'dodgerblue', label = 'Testing Loss')
axes[0].set_xlabel('Epochs', fontsize = 14)
axes[0].set_ylabel('Loss',fontsize = 14)
axes[0].set_title('CNN Loss Trainig VS Testing', fontsize = 14)
axes[0].legend(loc = 'best')
axes[1].plot(range(1, len(cnn_model_process.history['weighted_loss']) + 1), cnn_model_process.history['weighted_loss'], linestyle = 'solid', marker = 'o', color = 'crimson', label = 'Training Weighted Loss')
axes[1].plot(range(1, len(cnn_model_process.history['val_weighted_loss']) + 1), cnn_model_process.history['val_weighted_loss'], linestyle = 'solid', marker = 'o', color = 'dodgerblue', label = 'Testing Weighted Loss')
axes[1].set_xlabel('Epochs', fontsize = 14)
axes[1].set_ylabel('Weighted Loss',fontsize = 14)
axes[1].set_title('CNN Weighted Loss Trainig VS Testing', fontsize = 14)
axes[1].legend(loc = 'best')

fig, axes = plt.subplots(1, 2, figsize = (10, 5))
axes[0].plot(range(1, len(cnn_model_dropout_process.history['loss']) + 1), cnn_model_dropout_process.history['loss'], linestyle = 'solid', marker = 'o', color = 'crimson', label = 'Training Accuracy')
axes[0].plot(range(1, len(cnn_model_dropout_process.history['val_loss']) + 1), cnn_model_dropout_process.history['val_loss'], linestyle = 'solid', marker = 'o', color = 'dodgerblue', label = 'Testing Accuracy')
axes[0].set_xlabel('Epochs', fontsize = 14)
axes[0].set_ylabel('Loss',fontsize = 14)
axes[0].set_title('CNN Dropout Loss Trainig VS Testing', fontsize = 14)
axes[0].legend(loc = 'best')
axes[1].plot(range(1, len(cnn_model_dropout_process.history['weighted_loss']) + 1), cnn_model_dropout_process.history['weighted_loss'], linestyle = 'solid', marker = 'o', color = 'crimson', label = 'Training Loss')
axes[1].plot(range(1, len(cnn_model_dropout_process.history['val_weighted_loss']) + 1), cnn_model_dropout_process.history['val_weighted_loss'], linestyle = 'solid', marker = 'o', color = 'dodgerblue', label = 'Testing Loss')
axes[1].set_xlabel('Epochs', fontsize = 14)
axes[1].set_ylabel('Weighted Loss',fontsize = 14)
axes[1].set_title('CNN Dropout Weighted Loss Trainig VS Testing', fontsize = 14)
axes[1].legend(loc = 'best')

In [None]:
# class DataGenerator(keras.utils.Sequence):

#     def __init__(self, list_IDs, labels=None, batch_size=1, img_size=(512, 512, 1), 
#                  img_dir=train_images_dir, *args, **kwargs):

#         self.list_IDs = list_IDs
#         self.labels = labels
#         self.batch_size = batch_size
#         self.img_size = img_size
#         self.img_dir = img_dir
#         self.on_epoch_end()

#     def __len__(self):
#         return int(ceil(len(self.indices) / self.batch_size))
#     def __getitem__(self, index):
#         indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
#         list_IDs_temp = [self.list_IDs[k] for k in indices]
        
#         if self.labels is not None:
#             X, Y = self.__data_generation(list_IDs_temp)
#             return X, Y
#         else:
#             X = self.__data_generation(list_IDs_temp)
#             return X
#     def on_epoch_end(self):
        
        
#         if self.labels is not None: # for training phase we undersample and shuffle
#             # keep probability of any=0 and any=1
#             keep_prob = self.labels.iloc[:, 0].map({0: 0.35, 1: 0.5})
#             keep = (keep_prob > np.random.rand(len(keep_prob)))
#             self.indices = np.arange(len(self.list_IDs))[keep]
#             np.random.shuffle(self.indices)
#         else:
#             self.indices = np.arange(len(self.list_IDs))
#     def __data_generation(self, list_IDs_temp):
#         X = np.empty((self.batch_size, *self.img_size))
        
#         if self.labels is not None: # training phase
#             Y = np.empty((self.batch_size, 6), dtype=np.float32)
        
#             for i, ID in enumerate(list_IDs_temp):
#                 X[i,] = _read(self.img_dir+ID+".dcm", self.img_size)
#                 Y[i,] = self.labels.loc[ID].values
        
#             return X, Y
        
#         else: # test phase
#             for i, ID in enumerate(list_IDs_temp):
#                 X[i,] = _read(self.img_dir+ID+".dcm", self.img_size)
            
#             return X


In [None]:
# from keras import backend as K

# def weighted_log_loss(y_true, y_pred):
#     """
#     Can be used as the loss function in model.compile()
#     ---------------------------------------------------
#     """
    
#     class_weights = np.array([2., 1., 1., 1., 1., 1.])
    
#     eps = K.epsilon()
    
#     y_pred = K.clip(y_pred, eps, 1.0-eps)

#     out = -(         y_true  * K.log(      y_pred) * class_weights
#             + (1.0 - y_true) * K.log(1.0 - y_pred) * class_weights)
    
#     return K.mean(out, axis=-1)
# def _normalized_weighted_average(arr, weights=None):
#     """
#     A simple Keras implementation that mimics that of 
#     numpy.average(), specifically for this competition
#     """
    
#     if weights is not None:
#         scl = K.sum(weights)
#         weights = K.expand_dims(weights, axis=1)
#         return K.sum(K.dot(arr, weights), axis=1) / scl
#     return K.mean(arr, axis=1)

# def weighted_loss(y_true, y_pred):
#     """
#     Will be used as the metric in model.compile()
#     ---------------------------------------------
    
#     Similar to the custom loss function 'weighted_log_loss()' above
#     but with normalized weights, which should be very similar 
#     to the official competition metric:
#         https://www.kaggle.com/kambarakun/lb-probe-weights-n-of-positives-scoring
#     and hence:
#         sklearn.metrics.log_loss with sample weights
#     """
    
#     class_weights = K.variable([2., 1., 1., 1., 1., 1.])
    
#     eps = K.epsilon()
    
#     y_pred = K.clip(y_pred, eps, 1.0-eps)

#     loss = -(        y_true  * K.log(      y_pred)
#             + (1.0 - y_true) * K.log(1.0 - y_pred))
    
#     loss_samples = _normalized_weighted_average(loss, class_weights)
    
#     return K.mean(loss_samples)

# def weighted_log_loss_metric(trues, preds):
#     """
#     Will be used to calculate the log loss 
#     of the validation set in PredictionCheckpoint()
#     ------------------------------------------
#     """
#     class_weights = [2., 1., 1., 1., 1., 1.]
    
#     epsilon = 1e-7
    
#     preds = np.clip(preds, epsilon, 1-epsilon)
#     loss = trues * np.log(preds) + (1 - trues) * np.log(1 - preds)
#     loss_samples = np.average(loss, axis=1, weights=class_weights)

#     return - loss_samples.mean()