In [None]:
!pip -q install mlflow

In [None]:
# %env SM_FRAMEWORK=tf.keras

In [None]:
import mlflow.tensorflow
mlflow.tensorflow.autolog()

In [None]:
import numpy as np
import pandas as pd
import pydicom
import os
import collections
import sys
import glob
import random
import cv2
import tensorflow as tf
import multiprocessing

from math import ceil, floor
from copy import deepcopy
from tqdm import tqdm_notebook as tqdm
from imgaug import augmenters as iaa

import tensorflow.keras
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam

def calculating_class_weights(y_true):
    from sklearn.utils.class_weight import compute_class_weight
    number_dim = np.shape(y_true)[1]
    weights = np.empty([number_dim, 2])
    for i in range(number_dim):
        weights[i] = compute_class_weight('balanced', [0.,1.], y_true[:, i])
    return weights

In [None]:
!pip install efficientnet
!pip install iterative-stratification

In [None]:
# Import Custom Modules
import efficientnet.tfkeras as efn 
from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit

In [None]:
# Seed
SEED = 12345
np.random.seed(SEED)
# tf.set_random_seed(SEED)

# Constants
TEST_SIZE = 0.1
HEIGHT = 256
WIDTH = 256
CHANNELS = 3
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 64
SHAPE = (HEIGHT, WIDTH, CHANNELS)

# Folders
DATA_DIR = '/kaggle/input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/'
TEST_IMAGES_DIR = DATA_DIR + 'stage_2_test/'
TRAIN_IMAGES_DIR = DATA_DIR + 'stage_2_train/'

In [None]:
def correct_dcm(dcm):
    x = dcm.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    dcm.PixelData = x.tobytes()
    dcm.RescaleIntercept = -1000

def window_image(dcm, window_center, window_width):    
    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
        correct_dcm(dcm)
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    
    # Resize
    img = cv2.resize(img, SHAPE[:2], interpolation = cv2.INTER_LINEAR)
   
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)
    return img

def bsb_window(dcm0,dcm1,dcm2):
    brain_img = window_image(dcm0, 40, 380)
    subdural_img = window_image(dcm1, 40, 380)
    soft_img = window_image(dcm2, 40, 380)
    
    brain_img = (brain_img - (-150)) / 380
    subdural_img = (subdural_img - (-150)) / 380
    soft_img = (soft_img - (-150)) / 380
    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)
    return bsb_img

def _read(path0,path1,path2, SHAPE):
    dcm0 = pydicom.dcmread(path0)
    dcm1= pydicom.dcmread(path1)
    dcm2 = pydicom.dcmread(path2)
    
    
    try:
        img = bsb_window(dcm0,dcm1,dcm2)
    except:
#         img = bsb_window(dcm)
        img = np.zeros(SHAPE)
    return img

In [None]:
# Image Augmentation
sometimes = lambda aug: iaa.Sometimes(0.25, aug)
augmentation = iaa.Sequential([ iaa.Fliplr(0.25),
                                iaa.Flipud(0.10),
                                sometimes(iaa.Crop(px=(0, 25), keep_size = True, sample_independently = False))   
                            ], random_order = True)       
        
# Generators
class TrainDataGenerator(tensorflow.keras.utils.Sequence):
    def __init__(self, dataset, labels, batch_size = 16, img_size = SHAPE, img_dir = TRAIN_IMAGES_DIR, augment = False, *args, **kwargs):
        self.dataset = dataset
        self.ids = dataset.index
#         self.len_ids = len(self.ids)
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = img_dir
        self.augment = augment
        self.on_epoch_end()

    def __len__(self):
        return int(ceil(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X, Y = self.__data_generation(indices)
        return X, Y

    def augmentor(self, image):
        augment_img = augmentation        
        image_aug = augment_img.augment_image(image)
        return image_aug

    def on_epoch_end(self):
        self.indices = np.arange(len(self.ids))
#         np.random.shuffle(self.indices)

    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        Y = np.empty((self.batch_size, 5), dtype=np.float32)
        
        for i, index in enumerate(indices):
            ID0 = self.ids[index-1]
            ID1 = self.ids[index]
            try:
                ID2 = self.ids[index+1]
            except:
                ID2 = self.ids[index]
            
            
            image = _read(self.img_dir+'ID_'+ID0+".dcm",self.img_dir+'ID_'+ID1+".dcm",self.img_dir+'ID_'+ID2+".dcm", self.img_size)
            
            if self.augment:
                X[i,] = self.augmentor(image)
            else:
                X[i,] = image
            Y[i,] = self.labels.iloc[index].values        
        return X, Y

class ValDataGenerator(tensorflow.keras.utils.Sequence):
    def __init__(self, dataset, labels, batch_size = 16, img_size = SHAPE, img_dir = TRAIN_IMAGES_DIR, augment = False, *args, **kwargs):
        self.dataset = dataset
        self.ids = dataset.index
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = img_dir
        self.augment = augment
        self.on_epoch_end()

    def __len__(self):
        return int(ceil(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X, Y = self.__data_generation(indices)
        return X, Y

    def augmentor(self, image):
        augment_img = augmentation        
        image_aug = augment_img.augment_image(image)
        return image_aug

    def on_epoch_end(self):
        self.indices = np.arange(len(self.ids))

    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        Y = np.empty((self.batch_size, 6), dtype=np.float32)
        
        for i, index in enumerate(indices):
            ID = self.ids[index]
            image = _read(self.img_dir+ID+".dcm", self.img_size)
            if self.augment:
                X[i,] = self.augmentor(image)
            else:
                X[i,] = image
            Y[i,] = self.labels.iloc[index].values        
        return X, Y
    
class TestDataGenerator(tensorflow.keras.utils.Sequence):
    def __init__(self, dataset, labels, batch_size = 16, img_size = SHAPE, img_dir = TEST_IMAGES_DIR, *args, **kwargs):
        self.dataset = dataset
        self.ids = dataset.index
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = img_dir
        self.on_epoch_end()

    def __len__(self):
        return int(ceil(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indices)
        return X

    def on_epoch_end(self):
        self.indices = np.arange(len(self.ids))
    
    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        
        for i, index in enumerate(indices):
            ID = self.ids[index]
            image = _read(self.img_dir+ID+".dcm", self.img_size)
            X[i,] = image              
        return X

In [None]:
def read_testset(filename = DATA_DIR + "stage_2_sample_submission.csv"):
    df = pd.read_csv(filename)
    df["Image"] = df["ID"].str.slice(stop=12)
    df["Diagnosis"] = df["ID"].str.slice(start=13)
    df = df.loc[:, ["Label", "Diagnosis", "Image"]]
    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
    return df

def read_trainset(filename = DATA_DIR + "stage_2_train.csv"):
    df = pd.read_csv(filename)
    df["Image"] = df["ID"].str.slice(stop=12)
    df["Diagnosis"] = df["ID"].str.slice(start=13)
    duplicates_to_remove = [56346, 56347, 56348, 56349,
                            56350, 56351, 1171830, 1171831,
                            1171832, 1171833, 1171834, 1171835,
                            3705312, 3705313, 3705314, 3705315,
                            3705316, 3705317, 3842478, 3842479,
                            3842480, 3842481, 3842482, 3842483 ]
    df = df.drop(index = duplicates_to_remove)
    df = df.reset_index(drop = True)    
    df = df.loc[:, ["Label", "Diagnosis", "Image"]]
    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
    return df

# Read Train and Test Datasets
test_df = read_testset()
train_df = read_trainset()

In [None]:
train_df = train_df.iloc[:]
train_df

In [None]:
# Oversampling
epidural_df = train_df[train_df.Label['epidural'] == 1]
train_oversample_df = pd.concat([train_df, epidural_df])
train_df = train_oversample_df

# Summary
print('Train Shape: {}'.format(train_df.shape))
print('Test Shape: {}'.format(test_df.shape))

In [None]:
train_df[1]

In [None]:
data_f = pd.read_csv('../input/intracranial-hemorrhage-seresnext50-v2/meta_patient_homorrhage.csv')
np.sum(data_f.values[:,[6,1,2,3,4,5]],axis=0)/len(data_f)

In [None]:
data_f.index = data_f['id']
data_f

In [None]:
data_f.iloc[:75999]

In [None]:
new_train = data_f.iloc[75999:,[6,2,3,4,5]].copy()
new_val   = data_f.iloc[:75999,[6,2,3,4,5]].copy()

new_train

In [None]:
new_train.sum(axis=0)

In [None]:
new_val

In [None]:
weights = calculating_class_weights(data_f.values[:,[6,2,3,4,5]])
weights

In [None]:
def predictions(test_df, model):    
    test_preds = model.predict_generator(TestDataGenerator(test_df, None, 8, SHAPE, TEST_IMAGES_DIR), verbose = 1)
    return test_preds[:test_df.iloc[range(test_df.shape[0])].shape[0]]

def ModelCheckpointFull(model_name):
    return ModelCheckpoint(model_name, 
                            monitor = 'val_AUC_full', 
                            verbose = 1, 
                            save_best_only = True, 
                            save_weights_only = True, 
                            mode = 'max', 
                            period = 1)

# Create Model
def create_model():
    K.clear_session()
    
    base_model =  efn.EfficientNetB2(weights = 'imagenet', include_top = False, pooling = 'avg', input_shape = SHAPE)
    x = base_model.output
    x = Dropout(0.15)(x)
    y_pred = Dense(5, activation = 'sigmoid')(x)

    return Model(inputs = base_model.input, outputs = y_pred)

In [None]:
# # Submission Placeholder
# submission_predictions = []

# # Multi Label Stratified Split stuff...
# msss = MultilabelStratifiedShuffleSplit(n_splits = 10, test_size = TEST_SIZE, random_state = SEED)
# X = train_df.index
# Y = train_df.Label.values

In [None]:
# # Get train and test index
# msss_splits = next(msss.split(X, Y))
# train_idx = msss_splits[0]
# valid_idx = msss_splits[1]

In [None]:
# len(train_idx),len(valid_idx)

In [None]:
train_df

In [None]:
# np.random.shuffle(train_idx)
# print(train_idx[:5])    
# print(valid_idx[:5])

# data_generator_train = TrainDataGenerator(train_df.iloc[train_idx], 
#                                             train_df.iloc[train_idx], 
#                                             TRAIN_BATCH_SIZE, 
#                                             SHAPE,
#                                             augment = True)


In [None]:
new_train

In [None]:
data_generator_train = TrainDataGenerator(train_df.iloc[:], 
                                           train_df.iloc[:], 
                                            TRAIN_BATCH_SIZE, 
                                            SHAPE,
                                            augment = True)

data_generator_val = TrainDataGenerator(test_df.iloc[:], 
                                           test_df.iloc[:], 
                                            VALID_BATCH_SIZE, 
                                            SHAPE,
                                            augment = False)

TRAIN_STEPS = int(len(data_generator_train) / 10)
LR = 0.0001



for i, j in data_generator_val:
    print(i.shape)
    break

In [None]:
TRAIN_STEPS

In [None]:
import matplotlib.pyplot as plt
plt.imshow(i[5,:,:,0])
plt.colorbar()
plt.show()
plt.imshow(i[5,:,:,1])
plt.show()
plt.imshow(i[5,:,:,2])
plt.show()

In [None]:
# (train_df.iloc[valid_idx].values[:,5]==1).sum()/10

In [None]:
AUC = tf.keras.metrics.AUC
RECALL = tf.keras.metrics.Recall
PRECISION = tf.keras.metrics.Precision
# PRECISION = tf.keras.metrics.RecallAtPrecision

In [None]:
# Create Model
Metrics = [AUC(name = 'AUC_full', multi_label=True),
           AUC(name = 'AUC_0', multi_label=True, label_weights=[1,0,0,0,0]),
#            AUC(name = 'AUC_1', multi_label=True, label_weights=[0,1,0,0,0,0]),
           AUC(name = 'AUC_2', multi_label=True, label_weights=[0,1,0,0,0]),
           AUC(name = 'AUC_3', multi_label=True, label_weights=[0,0,1,0,0]),
           AUC(name = 'AUC_4', multi_label=True, label_weights=[0,0,0,1,0]),
           AUC(name = 'AUC_5', multi_label=True, label_weights=[0,0,0,0,1]),
           
           RECALL(thresholds=0.7,name='REC_full'),
           RECALL(thresholds=0.7,class_id=0, name='REC_0'),
#            RECALL(thresholds=0.7,class_id=1, name='REC_1'),
           RECALL(thresholds=0.7,class_id=1, name='REC_2'),
           RECALL(thresholds=0.7,class_id=2, name='REC_3'),
           RECALL(thresholds=0.7,class_id=3, name='REC_4'),
           RECALL(thresholds=0.7,class_id=4, name='REC_5')]
          
#            PRECISION(thresholds=0.7, name='PRE_full'),
#            PRECISION(thresholds=0.7, class_id=0, name='PRE_0'),
#            PRECISION(thresholds=0.7, class_id=1, name='PRE_1'),
#            PRECISION(thresholds=0.7, class_id=2, name='PRE_2'),
#            PRECISION(thresholds=0.7, class_id=3, name='PRE_3'),
#            PRECISION(thresholds=0.7, class_id=4, name='PRE_4'),
#            PRECISION(thresholds=0.7, class_id=5, name='PRE_5')]

def get_weighted_loss(weights):
    def weighted_loss(y_true, y_pred):
        return K.mean((weights[:,0]**(1-y_true))*(weights[:,1]**(y_true))*K.binary_crossentropy(y_true, y_pred), axis=-1)
    return weighted_loss

model = create_model()   
model.compile(optimizer = Adam(learning_rate = LR), 
                  loss = get_weighted_loss(weights),
                  metrics = Metrics)

In [None]:
# model.load_weights('../input/hemorrhageefficientnetb2v2best-weights/model.h5')

In [None]:
# !pip install tensorflow-gpu==2.5.0

In [None]:
# def main():
with mlflow.start_run():
    model.fit_generator(generator = data_generator_train,
                            validation_data = data_generator_val,
                            steps_per_epoch = TRAIN_STEPS,
                            epochs = 15,
                            callbacks = [ModelCheckpointFull('model.h5')],
                            verbose = 1,workers=4)

In [None]:
# res = model.evaluate_generator(data_generator_val,
#                             verbose = 1,workers=4)

In [None]:
# data_generator_val = ValDataGenerator(train_df.iloc[valid_idx], 
#                                         train_df.iloc[valid_idx], 
#                                         VALID_BATCH_SIZE, 
#                                         SHAPE,
#                                         augment = False)

In [None]:
# result = model.predict_generator(data_generator_val,
#                             verbose = 1,workers=4)

In [None]:
# # y_true = []
# k = 0
# for i,j in data_generator_val:
#     print(k)
#     if k==0:
#         y_true = np.array(j)
#     else:       
#         y_true = np.concatenate((y_true,j),axis=0)
#     k+=1

In [None]:
# result.shape,y_true.shape[0]/(1181+1)

In [None]:
# roc_auc_score(np.array(np.concatenate((y_true[:64,0],y_true[128:,0]),axis=0),int), result[:,i])

In [None]:
# import numpy as np
# from sklearn.metrics import precision_recall_curve,roc_auc_score
# print(y_true.shape)
# i = 0
# precision, recall, thresholds = precision_recall_curve(np.array(y_true[:,i],int), result[:,i])
# precision, recall, thresholds

In [None]:
# i=0
# roc_auc_score(np.array(y_true[:,i],int), result[:,i])

In [None]:
# keys = train_df['Label'].columns
# keys

In [None]:
# import matplotlib.pyplot as plt
# plt.figure(figsize=(8,8))
# plt.style.use('seaborn')
# i = 0
# precision, recall, thresholds = precision_recall_curve(np.array(y_true[:,i],int), result[:,i])
# # plt.subplot(2,3,i+1)
# plt.xlabel('Recall')
# plt.ylabel('Precision')

# plt.plot(recall,precision,label=f'{keys[i]}')

# i = 1
# precision, recall, thresholds = precision_recall_curve(np.array(y_true[:,i],int), result[:,i])
# # plt.subplot(2,3,i+1)
# plt.xlabel('Recall')
# plt.ylabel('Precision')

# plt.plot(recall,precision,label=f'{keys[i]}')

# i = 2
# precision, recall, thresholds = precision_recall_curve(np.array(y_true[:,i],int), result[:,i])
# # plt.subplot(2,3,i+1)
# plt.xlabel('Recall')
# plt.ylabel('Precision')

# plt.plot(recall,precision,label=f'{keys[i]}')

# i = 3
# precision, recall, thresholds = precision_recall_curve(np.array(y_true[:,i],int), result[:,i])
# # plt.subplot(2,3,i+1)
# plt.xlabel('Recall')
# plt.ylabel('Precision')

# plt.plot(recall,precision,label=f'{keys[i]}')

# i = 4
# precision, recall, thresholds = precision_recall_curve(np.array(y_true[:,i],int), result[:,i])
# # plt.subplot(2,3,i+1)
# plt.xlabel('Recall')
# plt.ylabel('Precision')

# plt.plot(recall,precision,label=f'{keys[i]}')

# i = 5
# precision, recall, thresholds = precision_recall_curve(np.array(y_true[:,i],int), result[:,i])

# plt.xlabel('Recall')
# plt.ylabel('Precision')

# plt.plot(recall,precision,label=f'{keys[i]}')
# plt.legend()

# plt.savefig('pr_re_curve.jpg',dpi=250,bbox_inches='tight')

In [None]:
# def main():
# with mlflow.start_run():
#     model.fit_generator(generator = data_generator_train,
#                             validation_data = data_generator_val,
#                             steps_per_epoch = TRAIN_STEPS,
#                             epochs = 17,
#                             callbacks = [ModelCheckpointFull('model.h5')],
#                             verbose = 1,workers=4)

In [None]:
# np.mean([0.978,0.989,0.985,0.992,0.968,0.967]),0.98