**(Version 6)**
- Training `DenseNet201`

**(Version 7)**
- Try for 60 epochs

**(Version 11)**
- Try `ResNext101`
- Change Bone window to soft
- Add RadomContrast (Wasn't a good idea, maybe resulted in adding noise to the windowing operation)
- Two useful notebooks were: [Pytorch ResNext101](https://www.kaggle.com/braquino/pytorch-resnext-32x8d-centercrop), [Keras ResNext101](https://www.kaggle.com/afsan123/keras-resnext50-holdout-split)
- ++ Other changes which can be explored in version comparing
- Removing the additional layers

**(Version 12)**
- Bringing back the additional layers

**(Version 14)**
- Return to bone windowing (adding soft windowing seems to have some issues)

**(Version 15)**
- Use soft window
- Use Densenet201 and make all layers trainable
- Remove some layers from the augmentation, they may have resulted in noise
- Modify the last layers of the model (Removed Dense(100) and Dropout)
- n_samples is 15000

**(Version 16)**
- Increase the Dropout to 0.5 in the last layer
- n_samples is 20000

**(Version 17)**
- Increase the Dropout to 0.5 in the last layer
- n_samples is 20000

**(Version 18)**
- Increase the Dropout to 0.8 in the last layer
- Make the whole model trainable, except the last layer (This worked better surprisingly!) (LR = 0.000125)


**(Version 19)**
- Try `InceptionResnetV2`

**(Version 20)**
- Freeze the first 15 layers of the model and make the rest trainable

**(Version 23)**
- The whole model is trainable
- Return to bone windowing
- Return to the very first data augmentation layers
- Try `DenseNet201` again

--------------------------------------------------------------------------------------------------------------
**(Version 24)**
- Try `DenseNet121` again

--------------------------------------------------------------------------------------------------------------
- Try `DenseNet169` again
--------------------------------------------------------------------------------------------------------------
- `MobileNetV2` sample size 107933

- MobileNetV1

In [None]:
# !pip install image-classifiers
# # !pip install iterative-stratification

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
import pydicom
import math
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import os
import seaborn as sns
from sklearn.metrics import multilabel_confusion_matrix
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #disable 
import matplotlib
matplotlib.rc('xtick', labelsize=15) 
matplotlib.rc('ytick', labelsize=15) 
sns.set_style("darkgrid")
sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 4})

https://www.kaggle.com/afsan123/keras-resnext50-holdout-split#kln-220

In [None]:
# from classification_models.tfkeras import Classifiers

In [None]:
SEED = 42
SUBCLASSES = ['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']
SAMPLE_SUBCLASS = 107933
HU_MIN = 0
HU_MAX = 100
IMAGE_SIZE = (224,224)
BATCH_SIZE = 32
NUM_CLASSES = 5
EPOCHS =10 
METRICS = [tf.keras.metrics.BinaryAccuracy(), 
           tf.keras.metrics.Precision(),
           tf.keras.metrics.Recall(),
           tf.keras.metrics.AUC(),
           tf.keras.metrics.SpecificityAtSensitivity(0.5),
           tf.keras.metrics.SensitivityAtSpecificity(0.5)
          ]

In [None]:
def plot_learning_curves(history, metrics_to_plot = ['loss','binary_accuracy', 'precision', 'recall', 'auc']):
  ncols = 2
  nrows = math.ceil(len(metrics_to_plot) / 2)
  if len(metrics_to_plot) <= 2:
        fig, axes = plt.subplots(nrows,ncols, figsize=(20,10))
        for i in range(2):
            axes[i].plot(history.history[metrics_to_plot[i]], label=metrics_to_plot[i] +' (training data)')
            axes[i].plot(history.history['val_'+metrics_to_plot[i]], label=metrics_to_plot[i] + ' (val data)')
            axes[i].set_ylabel('Value', fontsize = 20)
            axes[i].set_xlabel('No. epoch', fontsize = 20)
            axes[i].legend(prop={'size': 20})
            axes[i].set_title(metrics_to_plot[i], size = 22)
  else:        
      fig, axes = plt.subplots(nrows,ncols, figsize=(15,20))

      for i in range(ncols):
        for j in range(nrows):
          metric_idx = j * ncols + i
          if metric_idx >= len(metrics_to_plot):
                break
          axes[j,i].plot(history.history[metrics_to_plot[metric_idx]], label=metrics_to_plot[metric_idx] +' (training data)')
          axes[j,i].plot(history.history['val_'+metrics_to_plot[metric_idx]], label=metrics_to_plot[metric_idx] + ' (val data)')
          axes[j,i].set_ylabel('Value', fontsize = 20)
          axes[j,i].set_xlabel('No. epoch', fontsize = 20)
          axes[j,i].legend(prop={'size': 20})
          axes[j,i].set_title(metrics_to_plot[metric_idx], size = 22)
  plt.tight_layout()
     

In [None]:
TRAIN_PATH = '../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train/'
train_df = pd.read_csv('../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train.csv')
train_df.head()

In [None]:
label = train_df.Label
train_df = train_df.ID.str.rsplit('_', n=1, expand = True)
train_df['label'] = label
train_df.rename({0:'id', 1: 'subtype'}, axis =1, inplace=True)
train_df.head()

In [None]:
train_df = pd.pivot_table(train_df, index='id', columns='subtype', values = 'label')
train_df.head()

In [None]:
train_df.index = train_df.index.astype(str) + '.dcm'
train_df.head()

In [None]:
abnormal_df = train_df[train_df['any'] == 1]
abnormal_df.head()

In [None]:
abnormal_df.shape

In [None]:
sums = abnormal_df.drop(columns=['any']).sum()
sns.barplot(data=sums, x = sums.index, y = sums.values)
plt.xticks(ticks = range(5), labels=sums.sort_values().index, rotation=90)
plt.show()

In [None]:
sample = abnormal_df.sample(SAMPLE_SUBCLASS, replace = False, random_state = SEED)
sample.head()

In [None]:
sample.drop(columns = ['any'], inplace=True)

In [None]:
sums = sample.sum()
sns.barplot(data=sums, x = sums.index, y = sums.values)
plt.xticks(ticks = range(5), labels=sums.sort_values().index, rotation=90)
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(sample.index, sample, test_size = 0.3, random_state = SEED)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(y_train.index, y_train, test_size = 0.3, random_state = SEED)

In [None]:
train_df = y_train
val_df = y_val
test_df = y_test

In [None]:
class_weights = (len(sample) / (len(SUBCLASSES) * sample.sum())).values
class_weights

### Custom loss
#### Multilabel Loss

In [None]:
def np_multilabel_loss(class_weights=None):
    def single_class_crossentropy(y_true, y_pred):
        y_true = tf.cast(y_true, tf.float32)
        y_pred = tf.cast(y_pred, tf.float32)
        
        y_pred = tf.where(y_pred > 1-(1e-07), 1-1e-07, y_pred)
        y_pred = tf.where(y_pred < 1e-07, 1e-07, y_pred)
        single_class_cross_entropies = - tf.reduce_mean(y_true * tf.math.log(y_pred) + (1-y_true) * tf.math.log(1-y_pred), axis=0)

        if class_weights is None:
            loss = tf.reduce_mean(single_class_cross_entropies)
        else:
            loss = tf.reduce_sum(class_weights*single_class_cross_entropies)
        return loss
    return single_class_crossentropy

In [None]:
METRICS = METRICS + [np_multilabel_loss()]
METRICS_NAMES = []
for metric in METRICS:
    if hasattr(metric, 'name'):
        METRICS_NAMES.append(metric.name)
    else:
        METRICS_NAMES.append(metric.__name__)

In [None]:
def correct_dcm(dcm):
    x = dcm.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    dcm.PixelData = x.tobytes()
    dcm.RescaleIntercept = -1000
    
def get_first_of_dicom_field_as_int(x):
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    return int(x)
    
def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, # window center
                    data[('0028','1051')].value, # window width
                    data[('0028','1052')].value, # intercept
                    data[('0028','1053')].value, # slope
                   ]
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]
    

def get_min_max_of_window_value(window_center, window_width):
    mini = window_center - (window_width // 2)
    maxi = window_center + (window_width // 2) 
    return mini, maxi

def window_image(img, window_center, window_width):
    try:
        _,_, intercept, slope = get_windowing(img)
        img = img.pixel_array * slope + intercept
        img_min, img_max = get_min_max_of_window_value(window_center, window_width)
        img[img < img_min] = img_min
        img[img > img_max] = img_max
#         img = (img - np.min(img)) / (np.max(img) - np.min(img)) #normalize
    except:
        img = img_min * np.ones(IMAGE_SIZE)
        
    return img

def normalize(channel, wc_ww: tuple, norm_type = 'none'):
    if norm_type.lower() == 'none':
        return channel
    if norm_type.lower() == 'min_max':
        mini, maxi = get_min_max_of_window_value(wc_ww[0], wc_ww[1])
        resulted_channel = (channel - mini) / (maxi - mini)
        return resulted_channel
    

def bsb_window(img):
    bsb_config = {'brain': (40,80),
             'subdural': (80,200),
             'soft': (50, 350)}
    brain_img = window_image(img, *bsb_config['brain'])
    subdural_img = window_image(img,*bsb_config['subdural'])
    soft_img = window_image(img, *bsb_config['soft'])
    
    brain_img = normalize(brain_img, bsb_config['brain'], 'min_max')
    subdural_img = normalize(subdural_img, bsb_config['subdural'], 'min_max')
    soft_img = normalize(soft_img, bsb_config['soft'], 'min_max')
    
#         print(np.min(soft_img))
#     brain_img = (brain_img - 0) / 80
#     subdural_img = (subdural_img - (-20)) / 200
#     soft_img = (soft_img - (-150)) / 380 # (-150 = 40 - 380 / 2)
#         print(np.min(soft_img))
    bsb_img = np.zeros((brain_img.shape[0], brain_img.shape[1],3))
    bsb_img[:, :, 0] = brain_img
    bsb_img[:, :, 1] = subdural_img
    bsb_img[:, :, 2] = soft_img
    
    if (np.any(np.isnan(bsb_img))):
        bsb_img = np.ones((*IMAGE_SIZE,3))
        
    return bsb_img

    
class ImageGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataframe,batch_size,shuffle,num_classes = NUM_CLASSES):
        self.dataframe = dataframe
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.shuffle = shuffle
        
    def __len__(self):
        return math.ceil(len(self.dataframe) / self.batch_size)
    
    def __getitem__(self, index):
        batch_df = self.dataframe.iloc[index * self.batch_size: (index+1) * self.batch_size]
        paths = TRAIN_PATH + batch_df.index.astype(str)
        X = np.empty((len(batch_df), *IMAGE_SIZE, 3))
        y = np.empty((len(batch_df), self.num_classes))
        for i, path in enumerate(paths):
            dcm = pydicom.dcmread(path)
            # correct dcm
            if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
                correct_dcm(dcm)
#             rescaled_img = rescale_pixelarray(dcm)
#             windowed_img = set_manual_window(rescaled_img, HU_MIN, HU_MAX)
#             img = tf.convert_to_tensor(windowed_img, dtype=tf.float32)
            img = bsb_window(dcm)
            img = tf.convert_to_tensor(img, dtype=tf.float64)
#             assert tf.reduce_min(img) >= -1 and tf.reduce_max(img) <= 1, 'Check these values img in (-1,1)'
            X[i] = tf.image.resize(img, IMAGE_SIZE)
            y[i] = batch_df.iloc[i].values
#             assert tf.reduce_min(y[i]) >= 0 and tf.reduce_max(y[i]) <= 1, 'Check target values in (0,1)'
            
        return X, y
    def on_epoch_end(self):
        if self.shuffle:
            self.dataframe = self.dataframe.sample(len(self.dataframe), replace = False, random_state = SEED)
        self.current_epoch += 1

In [None]:
img_generator_train = ImageGenerator(train_df, BATCH_SIZE, shuffle=True)
img_generator_val = ImageGenerator(val_df, BATCH_SIZE, shuffle = True)
img_generator_test = ImageGenerator(test_df, BATCH_SIZE, shuffle = False)
train_data = tf.data.Dataset.from_generator(lambda: map(tuple, img_generator_train), 
                                            output_types=(tf.float64, tf.uint8),
                                            output_shapes = (
                                                    tf.TensorShape((None, *IMAGE_SIZE,3)),
                                                    tf.TensorShape((None, NUM_CLASSES))
                                            ))
val_data = tf.data.Dataset.from_generator(lambda: map(tuple, img_generator_val), 
                                          output_types=(tf.float64, tf.uint8),
                                          output_shapes = (
                                                    tf.TensorShape((None, *IMAGE_SIZE,3)),
                                                    tf.TensorShape((None, NUM_CLASSES))
                                            ))
test_data = tf.data.Dataset.from_generator(lambda: map(tuple, img_generator_test), 
                                          output_types=(tf.float64, tf.uint8),
                                          output_shapes = (
                                                    tf.TensorShape((None, *IMAGE_SIZE,3)),
                                                    tf.TensorShape((None, NUM_CLASSES))
                                            ))



In [None]:
data_augmentation = tf.keras.Sequential([
   tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
   tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
   tf.keras.layers.experimental.preprocessing.RandomZoom(0.2),
   tf.keras.layers.experimental.preprocessing.RandomHeight(0.2),
   tf.keras.layers.experimental.preprocessing.RandomWidth(0.2),  
])

# ResNext101, preprocess_input = Classifiers.get('resnext101')
# base_model = ResNext101(IMAGE_SIZE + (3,), weights = 'imagenet', include_top = False)
base_model = tf.keras.applications.MobileNet(include_top = False)
for layer in base_model.layers:
    layer.trainable = True
    

inputs = tf.keras.layers.Input(shape = IMAGE_SIZE + (3,), name = "input_layer")
# x = preprocess_input(inputs)
x = data_augmentation(inputs)

x = base_model(x)

x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.8)(x)
# x = tf.keras.layers.Dense(100)(x)
# x = tf.keras.layers.Dropout(0.5)(x)

outputs = tf.keras.layers.Dense(NUM_CLASSES, activation='sigmoid')(x)
model = tf.keras.Model(inputs, outputs)

model.compile(loss=np_multilabel_loss(class_weights),
             optimizer = tf.keras.optimizers.Adam(learning_rate=0.000125),
             metrics = METRICS)
print(model.summary())
history = model.fit(train_data, 
                   epochs = EPOCHS,
                   validation_data = val_data,
                   callbacks = [tf.keras.callbacks.ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min', save_freq='epoch'),
                               tf.keras.callbacks.EarlyStopping(restore_best_weights=True, patience=5),
                               tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                           factor=0.5,
                                           patience=2,
                                           min_lr=1e-8,
                                           mode="min")
                               ]
)

eval_res = model.evaluate(val_data)

In [None]:
model.summary()

In [None]:
model.evaluate(test_data)

In [None]:
pred_prob = model.predict(test_data)
pred_prob

In [None]:
pred_prob = pd.DataFrame(pred_prob, columns = SUBCLASSES, index = test_df.index)
pred_prob

In [None]:
pred_values = pd.DataFrame((pred_prob > 0.5).astype(int), columns = SUBCLASSES, index = test_df.index)
pred_values

In [None]:
pred_values.sum(axis = 1).value_counts()

In [None]:
test_df.head()

In [None]:
pred_values

In [None]:
pred_values.sum(axis = 1) == 2

In [None]:
def get_class_names(row):
    trues = row == 1
    return np.array(trues[trues].index)

In [None]:
y_pred = pred_values.apply(get_class_names, axis = 1)
y_pred

In [None]:
y_true = test_df.apply(get_class_names, axis = 1)
y_true

In [None]:
pred_values.to_csv('pred_values.csv')
pred_prob.to_csv('pred_prob.csv')
test_df.to_csv('test_df.csv')

In [None]:
plt.plot(history.history["lr"], 'o-')
plt.title('Learning Rate')
plt.xlabel('Epochs')
plt.ylabel('LR')
plt.show()

In [None]:
def plot_confusion_matrix(cm, class_name,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):

    # Plot confusion matrix in a beautiful manner
    fig = plt.figure(figsize=(12, 10))
    ax= plt.subplot()
    sns.heatmap(cm, annot=True, ax = ax, fmt = 'g', cmap = cmap); #annot=True to annotate cells
    # labels, title and ticks
    ax.set_xlabel('Predicted', fontsize=20)
    ax.xaxis.set_label_position('bottom')
    plt.xticks(rotation=0)
    ax.xaxis.tick_bottom()

    ax.set_ylabel('True', fontsize=20)
    plt.yticks(rotation=0)

    plt.title(class_name, fontsize=20)
    plt.show()

In [None]:
for subclass_name, matrix in zip(SUBCLASSES, multilabel_confusion_matrix(test_df, pred_values)):
#     print(matrix)
    plot_confusion_matrix(matrix, subclass_name)

In [None]:
print(classification_report(test_df, pred_values, target_names = SUBCLASSES))

In [None]:
plot_learning_curves(history, metrics_to_plot=['loss'] + METRICS_NAMES)