In [None]:
!pip install nb_black
%load_ext nb_black

In [None]:
import numpy as np
import pandas as pd
import os
import shutil
import multiprocessing
import pickle
import cv2

!pip install iterative-stratification
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from sklearn.model_selection import train_test_split

#!pip install tensorflow-gpu
import tensorflow as tf

print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices("GPU")))

from keras_preprocessing.image import ImageDataGenerator
from keras.applications import Xception, vgg16
from keras.layers import (
    GlobalAveragePooling2D,
    Dense,
    BatchNormalization,
    Dropout,
    Concatenate,
    Add,
)
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, Callback
from keras.losses import binary_crossentropy
from keras import backend as K
from keras.utils import Sequence

#!pip install -U efficientnet==0.0.4 #Used from Script Utility https://www.kaggle.com/ratthachat/efficientnet
!pip install -U efficientnet #Used from Script Utility https://www.kaggle.com/ratthachat/efficientnet
from efficientnet import EfficientNetB3, preprocess_input

!pip install keras-rectified-adam
# os.environ['TF_KERAS'] = '1' # Required to work RAdam work properly (maybe in future versions is solved)
from keras_radam import RAdam

from tqdm import tqdm_notebook

from matplotlib import pyplot as plt
import matplotlib.image as image

%matplotlib inline

In [None]:
# Custom codes to make code clear
from dnn_helper import (
    KFolds_flow_from_dataframe,
    fold_training,
    save_train,
    visualize_training,
    config_model_trainable,
    bce_dice_loss,
    BatchHistoryEarlyStopping,
    make_pred,
)
from misc_utils import extract_zips, rebalance_data, oversampling_data
from image_utils import preprocess_image, check_range, transform_range
from dicom_utils import (
    preprocess_dicom,
    sample_bins_mean_std,
    get_dcm_img,
    normalize_img,
)

In [None]:
BASE_PATH = "../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/"
TRAIN_DIR = BASE_PATH + "stage_2_train/"
SUB_DIR = BASE_PATH + "stage_2_test/"

IMAGE_DATA_BASE_PATH = "../input/rsna-dicom-normalization-by-metadata-groups/"
CSV_DIR = IMAGE_DATA_BASE_PATH + "csv/"
TRAIN_ZIP = IMAGE_DATA_BASE_PATH + "train.zip"
TEST_ZIP = IMAGE_DATA_BASE_PATH + "test.zip"
SUB_ZIP = IMAGE_DATA_BASE_PATH + "sub.zip"

# Temporal data
TRAIN_PNG = "/kaggle/tmp/train/"
TEST_PNG = "/kaggle/tmp/test/"
SUB_PNG = "/kaggle/tmp/sub/"
AUG_TRAIN_PNG = "/kaggle/tmp/aug_png/"

# Outputs
PREDICTIONS_DIR = "predictions/"
MODELS_DIR = "model/"

CLASSES = [
    "any",
    "epidural",
    "intraparenchymal",
    "intraventricular",
    "subarachnoid",
    "subdural",
]

# As the training dataset is huge we could only train on a part of the whole training directory
FRACTION_TRAINING = 0.2

SEED = 42
np.random.seed(seed=SEED)
if int(tf.__version__.split(".")[0]) >= 2:
    tf.random.set_seed(seed=SEED)  # For tf v2 or higher
else:
    tf.set_random_seed(seed=SEED)  # For tf v1 or lower

In [None]:
def load_csv(path):
    df = pd.read_csv(path)
    df["filename"] = df["ID"].apply(lambda st: "ID_" + st.split("_")[1] + ".png")
    df["type"] = df["ID"].apply(lambda st: st.split("_")[2])
    return df

In [None]:
data_df = load_csv(BASE_PATH + "stage_2_train.csv")
data_df.drop_duplicates(["filename", "type"], inplace=True)
data_df = data_df.pivot("filename", "type", "Label").reset_index()  # Extract Labels
sub_df = load_csv(BASE_PATH + "stage_2_sample_submission.csv")
sub_df = pd.DataFrame(sub_df.filename.unique(), columns=["filename"])

In [None]:
# train_df, _ = train_test_split(data_df, train_size = FRACTION_TRAINING, stratify = data_df[CLASSES], random_state = SEED)
train_df, test_df = train_test_split(
    data_df, test_size=0.1, stratify=data_df[CLASSES], random_state=SEED
)

In [None]:
print(train_df.shape)
train_df.head()

In [None]:
train_df[CLASSES].mean()

In [None]:
USE_AUGMENTED_DATA = False

In [None]:
# In case of not using augmented data, will only do a simple undersampling on 'any' == 0 and oversampling on 'epidural' == 1
if not USE_AUGMENTED_DATA:
    # Undersampling depending on 'any' class to have a ratio 2-1 (2 samples of any = 0 for each sample of any = 1)
    ratio = 2
    train_df_indexes = rebalance_data(
        train_df,
        1,
        target="any",
        negative_downsampling=True,
        use_partial_data=True,
        ratio_partial_data=ratio,
    )[0]
    train_df = train_df.loc[train_df_indexes]

    # Oversampling on 'epidural' class because it has very low presence
    train_df = oversampling_data(train_df, target="epidural", goal_percentage=0.01)

In [None]:
train_df[CLASSES].mean()

In [None]:
df = train_df.copy()
sample_weight_cols = []
for target in CLASSES:
    df["count_" + target] = df.groupby(target)["filename"].transform("count")
    df["sample_weight_" + target] = df["count_" + target].max() / df["count_" + target]
    sample_weight_cols.append("sample_weight_" + target)

train_df["sample_weights"] = df[sample_weight_cols].max(axis=1).copy()
train_df[sample_weight_cols] = df[sample_weight_cols].copy()
del df

In [None]:
TRAIN_FULL_MODEL = False
BATCH_FREQ = 100
EPOCHS = 50
BATCH_SIZE = 64

DROPOUT_RATE = 0.4
img_size = (250, 250)

MODEL_NAME = "efficientnet"
MODELS_LAST_BLOCK = {"vgg": 126, "efficientnet": "multiply_16", "inception": 172}

LEARNING_RATE_TOP = 0.005
LEARNING_RATE = 0.0001
WARMUP_PROP = 0.1

In [None]:
class DataGenerator(Sequence):
    def __init__(
        self,
        dataset,
        training,
        targets=CLASSES,
        batch_size=16,
        img_size=img_size,
        img_dir=TRAIN_PNG,
        preprocess=True,
        bins=None,
        multi_output=False,
        verbose=False,
        *args,
        **kwargs
    ):
        self.dataset = dataset
        self.ids = dataset.index
        self.training = training
        self.targets = targets if training else None
        self.targets_names = ["pred_" + target for target in self.targets]
        self.labels = dataset[targets] if training else None
        self.batch_size = batch_size
        self.img_size = (img_size[0], img_size[1], 3)
        self.img_dir = img_dir
        self.preprocess = preprocess
        self.bins = bins
        self.multi_output = multi_output
        self.verbose = verbose
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.ids) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size : (index + 1) * self.batch_size]
        return self.__data_generation(indices)

    def on_epoch_end(self):
        self.indices = np.arange(len(self.ids))
        np.random.shuffle(self.indices)

    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        if self.training:
            # if self.multi_output:
            #    Y = [None,] * self.batch_size
            # else:
            Y = np.empty((self.batch_size, len(CLASSES)), dtype=np.float32)

        for i, index in enumerate(indices):
            filename = self.dataset.iloc[index]["filename"][:-4]

            if self.preprocess:
                final_image, _ = preprocess_dicom(
                    self.img_dir + filename + ".dcm",
                    self.img_size[0],
                    self.img_size[1],
                    bins=self.bins,
                    use_min_max=True,
                    # windows_type="brain",
                    windows_type=["brain", "subdural", "brain_bone"],
                    verbose=self.verbose,
                )
            else:
                windows_type = ["brain", "subdural", "brain_bone"]
                final_image = np.empty(self.img_size)
                for j, window_type in enumerate(windows_type):
                    image, _ = get_dcm_img(
                        self.img_dir + filename + ".dcm", verbose=self.verbose
                    )
                    image = normalize_img(image, use_min_max=True)
                    # Rescale to the defined image size
                    if image.shape != (self.img_size[0], self.img_size[1]):
                        image = cv2.resize(
                            image,
                            (self.img_size[0], self.img_size[1]),
                            interpolation=cv2.INTER_NEAREST,
                        )
                    final_image[:, :, j] = image.copy()
            X[i,] = final_image
            if self.training:
                # if self.multi_output:
                #    Y[i] = dict(zip(self.targets_names, self.labels.iloc[index].values))
                #
                # else:
                Y[i,] = self.labels.iloc[index].values

        if self.training:
            if self.multi_output:
                return X, [Y[:, i] for i in range(len(self.targets))]
            else:
                return X, Y
        else:
            return X

In [None]:
"""bins_mean, mean, std = sample_bins_mean_std(
    TRAIN_DIR, samples_per_group=5, max_trys=5000, n_bins=100
)"""
bins_mean = None

In [None]:
if TRAIN_FULL_MODEL:
    weights = None
else:
    weights = "imagenet"

# Pre-Trained CNN Model using imagenet dataset for pre-trained weights
if MODEL_NAME == "efficientnet":
    base_model = EfficientNetB3(
        input_shape=(img_size[0], img_size[1], 3), weights=weights, include_top=False
    )
elif MODEL_NAME == "inception":
    base_model = Xception(
        input_shape=(img_size[0], img_size[1], 3), weights=weights, include_top=False
    )
else:
    base_model = vgg16.VGG16(
        include_top=False,
        weights=weights,
        input_tensor=None,
        input_shape=(img_size[0], img_size[1], 3),
        pooling=None,
    )

# Top Model Block
x = base_model.output
x = GlobalAveragePooling2D()(x)
# x = Dense(256, activation='relu')(x)
# x = BatchNormalization()(x)
x = Dropout(DROPOUT_RATE, seed=SEED)(x)
pred_2 = Dense(1, activation="sigmoid", name="pred_" + CLASSES[1])(x)
pred_3 = Dense(1, activation="sigmoid", name="pred_" + CLASSES[2])(x)
pred_4 = Dense(1, activation="sigmoid", name="pred_" + CLASSES[3])(x)
pred_5 = Dense(1, activation="sigmoid", name="pred_" + CLASSES[4])(x)
pred_6 = Dense(1, activation="sigmoid", name="pred_" + CLASSES[5])(x)
x_add_any = Add()([pred_2, pred_3, pred_4, pred_5, pred_6])
x_add_any = BatchNormalization()(x_add_any)
x_concat_any = Concatenate()([x, pred_2, pred_3, pred_4, pred_5, pred_6, x_add_any])
pred_1 = Dense(1, activation="sigmoid", name="pred_" + CLASSES[0])(x_concat_any)
# predictions = Dense(len(CLASSES), activation="sigmoid", name="predictions")(x)

# add your top layer block to your base model
model = Model(base_model.input, [pred_1, pred_2, pred_3, pred_4, pred_5, pred_6])
# model = Model(base_model.input, predictions)

if TRAIN_FULL_MODEL:
    config_model_trainable(model, config="full")
else:
    config_model_trainable(model, config="top", base_model=base_model)

print(model.summary())

In [None]:
def bce_dice_loss_clip(y_true, y_pred):
    clip_loss = 10
    return bce_dice_loss(y_true, y_pred, clip_loss=clip_loss)


def jaccard_distance_loss(y_true, y_pred, smooth=100):
    """
    Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|)
            = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|))
    
    The jaccard distance loss is usefull for unbalanced datasets. This has been
    shifted so it converges on 0 and is smoothed to avoid exploding or disapearing
    gradient.
    
    Ref: https://en.wikipedia.org/wiki/Jaccard_index
    
    @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96
    @author: wassname
    """
    intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
    sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1)
    jac = (intersection + smooth) / (sum_ - intersection + smooth)
    return (1 - jac) * smooth

In [None]:
def calc_class_weight(df, targets, prefix="pred_"):
    class_weight = {}
    eps = 0.1
    for i in range(len(targets)):
        class_name = targets[i]
        weight = sum(df[class_name]) / len(df[class_name])
        class_weight[prefix + class_name] = {0: weight + eps, 1: 1 - weight + eps}
    return class_weight

In [None]:
def KFolds_stratified(dataframe, k=10, target="class", shuffle=True, seed=None):
    train_folds = []
    valid_folds = []
    Kfolds = []

    stratify_df = dataframe[target] if shuffle else None
    remain_df, kfold_df = train_test_split(
        dataframe,
        test_size=1 / (k),
        stratify=stratify_df,
        shuffle=shuffle,
        random_state=seed,
    )

    train_folds.append(kfold_df)

    i = 1
    while i < k - 1:
        try:
            stratify_df = remain_df[target] if shuffle else None
            remain_df, kfold_df = train_test_split(
                remain_df,
                test_size=1 / (k - i),
                stratify=stratify_df,
                shuffle=shuffle,
                random_state=seed,
            )
        except ValueError as e:
            print(f"Stratify is not posible at kfold {i} due to: {e}")
            remain_df, kfold_df = train_test_split(
                remain_df, test_sie=1 / (k - i), shuffle=shuffle, random_state=seed
            )

        train_folds.append(kfold_df)
        valid_folds.append(kfold_df)

        i = i + 1

    valid_folds.append(remain_df)
    i = 0
    while i < k - 1:
        Kfolds.append((train_folds[i], valid_folds[i]))
        i = i + 1

    return Kfolds

In [None]:
KFOLDS = 6
KFOLDS_TOP_LAYER_TRAIN = 3
KFOLDS_START_PREDICTIONS = np.inf  # Currently disable because CPU last to much
ITERATIONS = 2
SKIP_K = np.inf  # Currently disable

predictions = []
for i in range(ITERATIONS):
    print("Starting iteration ", i)
    # mskf = MultilabelStratifiedKFold(n_splits=KFOLDS, random_state=0)
    print("Creating KFolds")
    Kfolds = KFolds_stratified(
        train_df, k=KFOLDS, target=CLASSES, shuffle=True, seed=SEED
    )
    k = 0

    # for train_index, test_index in mskf.split(
    #    train_df.drop(CLASSES, axis=1), train_df[CLASSES]
    # ):
    #    fold_train = train_df.iloc[train_index]
    #    fold_valid = train_df.iloc[test_index]
    for fold_train, fold_valid in Kfolds:
        print("Fold ", k)
        print("Initialize generators")
        train_gen = DataGenerator(
            fold_train,
            training=True,
            targets=CLASSES,
            batch_size=BATCH_SIZE,
            img_size=img_size,
            img_dir=TRAIN_DIR,
            bins=bins_mean,
            multi_output=True,
            preprocess=False,
            verbose=False,
        )

        valid_gen = DataGenerator(
            fold_valid,
            training=True,
            targets=CLASSES,
            batch_size=BATCH_SIZE,
            img_size=img_size,
            img_dir=TRAIN_DIR,
            bins=bins_mean,
            multi_output=True,
            preprocess=False,
            verbose=False,
        )

        if k < KFOLDS_TOP_LAYER_TRAIN and not TRAIN_FULL_MODEL and i == 0:
            print("Train top layer")
            config_model_trainable(model, config="top", base_model=base_model)

            class_weight = calc_class_weight(fold_train, CLASSES, prefix="pred_")

            optimizer = RAdam(warmup_proportion=WARMUP_PROP, lr=LEARNING_RATE_TOP)
            loss = "binary_crossentropy"  # bce_dice_loss_clip
            metrics = ["accuracy", tf.keras.metrics.AUC()]
            model.compile(
                optimizer=optimizer, loss=loss, metrics=metrics,
            )

            history = model.fit_generator(
                train_gen,
                steps_per_epoch=len(fold_train) // BATCH_SIZE,
                epochs=1,
                validation_data=valid_gen,
                validation_steps=len(fold_valid) // BATCH_SIZE,
                shuffle=False,
                # class_weight=class_weight,
                # use_multiprocessing = True,
                # workers = 2 * multiprocessing.cpu_count(),
                # callbacks=callbacks_list)
            )
        elif k > SKIP_K:
            break
        else:
            if TRAIN_FULL_MODEL:
                print("Train full model")
                config_model_trainable(model, config="full")
            else:
                print("Train partial model")
                config_model_trainable(
                    model, config="partial", last_block=MODELS_LAST_BLOCK[MODEL_NAME]
                )

                class_weight = calc_class_weight(fold_train, CLASSES, prefix="pred_")

                optimizer = RAdam(warmup_proportion=WARMUP_PROP, lr=LEARNING_RATE)
                loss = "binary_crossentropy"  # bce_dice_loss_clip
                metrics = ["accuracy", tf.keras.metrics.AUC()]
                model.compile(
                    optimizer=optimizer, loss=loss, metrics=metrics,
                )

                history = model.fit_generator(
                    train_gen,
                    steps_per_epoch=len(fold_train) // BATCH_SIZE,
                    epochs=1,
                    validation_data=valid_gen,
                    validation_steps=len(fold_valid) // BATCH_SIZE,
                    shuffle=False,
                    # class_weight=class_weight,
                    # use_multiprocessing = True,
                    # workers = 2 * multiprocessing.cpu_count(),
                    # callbacks=callbacks_list)
                )

            if k >= KFOLDS_START_PREDICTIONS or (
                i > 0 and KFOLDS_START_PREDICTIONS != np.inf
            ):
                print("Make prediction")
                offset = 1 if len(sub_df) % BATCH_SIZE != 0 else 0
                steps = len(sub_df) // BATCH_SIZE + offset
                pred = make_pred(model, sub_generator, steps)

                numpy.savetxt(
                    "prediction_" + str(i) + "_" + str(k) + ".csv", pred, delimiter=","
                )
                predictions.append(pred)
                pred_sub = np.mean(predictions, axis=0)

                offset = 1 if len(test_df) % BATCH_SIZE != 0 else 0
                steps = len(test_df) // BATCH_SIZE + offset
                print(
                    "Evaluation on test set results on: ",
                    model.evaluate_generator(test_generator, steps=steps, verbose=1,),
                )
        k = k + 1

        save_train(model, history, models_dir=MODELS_DIR + f"iter_{i}_k_{k}/")

In [None]:
#pred_sub

In [None]:
#KFolds_gens[0][1][0][1]

In [None]:
'''
pred = pd.DataFrame(columns = ['X','y'])
pred['y'] = KFolds_gens[0][1].classes
pred['X'] = np.array(model.predict_generator(KFolds_gens[0][1],
                                        steps = KFolds_gens[0][1].n//KFolds_gens[0][1].batch_size,
                                        verbose = 1))
'''

In [None]:
'''
predictions = pd.DataFrame(columns = 'X','y')
def get_crossvalidation_pred(model, generator, predictions):
    pred = pd.DataFrame(columns = 'X','y')
    pred['y'] = generator.classes
    pred['X'] = np.array(model.predict_generator(generator,
                                            steps = generator.n//generator.batch_size,
                                            verbose = 1))
    
    predictions = pd.concat([predictions, pred], ignore_index=True)
'''

Deprecate stuff

In [None]:
'''
# Save best model
weights_path = MODELS_DIR + "model_weights.h5"
if not os.path.exists(MODELS_DIR):
    os.makedirs(MODELS_DIR)
    
history_top_callback = BatchHistoryEarlyStopping(valid_generator = test_generator
                                                 targets = CLASSES
                                                 batch_freq = BATCH_FREQ,
                                                 reset_on_train = False,
                                                 early_stopping = True,
                                                 monitor='val_loss',
                                                 patience=3,
                                                 verbose=1,
                                                 restore_best_weights=True)
    callbacks_list = [
        ModelCheckpoint(top_weights_path, monitor='val_loss', verbose=1, save_best_only=True),
        #EarlyStopping(monitor='val_loss', patience=2, verbose=0),
        history_top_callback
    ]
    # add the best weights from the train top model
    # at this point we have the pre-train weights of the base model and the trained weight of the new/added top model
    # we re-load model weights to ensure the best epoch is selected and not the last one.
    model.load_weights(top_weights_path)
    '''