In [None]:
#pip install efficientnet --quiet

In [None]:
!pip install git+https://github.com/qubvel/classification_models.git

In [None]:
import numpy as np
import pandas as pd
import pydicom
import os
import matplotlib.pyplot as plt
import collections
from tqdm import tqdm_notebook as tqdm
from datetime import datetime

from math import ceil, floor
import cv2

import tensorflow as tf
import keras

from albumentations import (
    Compose,
    HorizontalFlip, ShiftScaleRotate,VerticalFlip,
    RandomBrightness,RandomCrop,RandomContrast
)
import sys
#import cupy as cp
from classification_models.keras import Classifiers

#import albumentations
# from keras_applications.resnet import ResNet50
#from keras_applications.inception_v3 import InceptionV3
#from efficientnet.keras import EfficientNetB2
#from keras_applications.inception_resnet_v2 import InceptionResNetV2
#from keras_applications.densenet import DenseNet121
from sklearn.model_selection import ShuffleSplit

#print(os.listdir('../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection'))

test_images_dir = '../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_test/'
train_images_dir = '../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train/'

In [None]:
len(os.listdir('../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train'))

In [None]:
os.listdir('../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train')[0]

### 0. Sigmoid (brain + subudral + bone)
Many thanks to [Ryan Epp](https://www.kaggle.com/reppic/gradient-sigmoid-windowing). Code is taken from his kernel (see his kernel for more information and other peoples work --- for example [David Tang](https://www.kaggle.com/dcstang/see-like-a-radiologist-with-systematic-windowing), [Marco](https://www.kaggle.com/marcovasquez/basic-eda-data-visualization), [Nanashi](https://www.kaggle.com/jesucristo/rsna-introduction-eda-models), and [Richard McKinley](https://www.kaggle.com/omission/eda-view-dicom-images-with-correct-windowing)). At first I thought I couldn't use sigmoid windowing for this kernel because of how expensive it is to do, but I could resize the image prior to the transformation to save a lot of computation. Not sure how much this will affect the performance of the training, but it really speeded it up.

In [None]:
from math import log

def correct_dcm(dcm):
    x = dcm.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    dcm.PixelData = x.tobytes()
    dcm.RescaleIntercept = -1000
    return dcm

def window_image(dcm,window_center, window_width,desired_size,U=1.0, eps=(1.0 / 255.0)):
    
    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
        dcm = correct_dcm(dcm)
    
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    img = cv2.resize(img, desired_size[:2], interpolation=cv2.INTER_LINEAR)
    
    #img = cp.array(np.array(img))
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)

    return img

def bsb_window(dcm,desired_size=(256,256,3)):
    brain_img = window_image(dcm, 40, 80,desired_size)
    subdural_img = window_image(dcm, 80, 200,desired_size)
    soft_img = window_image(dcm, 40, 380,desired_size)
    
    brain_img = (brain_img - 0) / 80
    subdural_img = (subdural_img - (-20)) / 200
    soft_img = (soft_img - (-150)) / 380
    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)

    return bsb_img

# Sanity Check
# Example dicoms: ID_2669954a7, ID_5c8b5d701, ID_52c9913b1

dicom = pydicom.dcmread(train_images_dir + 'ID_5c8b5d701' + '.dcm')
#                                     ID  Label
# 4045566          ID_5c8b5d701_epidural      0
# 4045567  ID_5c8b5d701_intraparenchymal      1
# 4045568  ID_5c8b5d701_intraventricular      0
# 4045569      ID_5c8b5d701_subarachnoid      1
# 4045570          ID_5c8b5d701_subdural      1
# 4045571               ID_5c8b5d701_any      1
plt.imshow(bsb_window(dicom), cmap=plt.cm.bone);


Check (with an example) if the correction works (visually)

In [None]:
def window_with_correction(dcm, window_center, window_width):
    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
        dcm = correct_dcm(dcm)
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)
    return img

def window_without_correction(dcm, window_center, window_width):
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)
    return img

def window_testing(img, window):
    brain_img = window(img, 40, 80)
    subdural_img = window(img, 80, 200)
    soft_img = window(img, 40, 380)
    
    brain_img = (brain_img - 0) / 80
    subdural_img = (subdural_img - (-20)) / 200
    soft_img = (soft_img - (-150)) / 380
    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)

    return bsb_img

# example of a "bad data point" (i.e. (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100) == True)
dicom = pydicom.dcmread(train_images_dir + "ID_5c8b5d701" + ".dcm")

fig, ax = plt.subplots(1, 2)

ax[0].imshow(window_testing(dicom, window_without_correction), cmap=plt.cm.bone);
ax[0].set_title("original")
ax[1].imshow(window_testing(dicom, window_with_correction), cmap=plt.cm.bone);
ax[1].set_title("corrected");

### 1. Helper functions

* read and transform dcms to 3-channel inputs for e.g. InceptionV3. 
* uses `sigmoid_bsb_window` from previous cell

\* Source for windowing (although now partly removed from this kernel): https://www.kaggle.com/omission/eda-view-dicom-images-with-correct-windowing

In [None]:
def _read(path, desired_size):
    """Will be used in DataGenerator"""
    
    dcm = pydicom.dcmread(path)
    
    try:
        img = bsb_window(dcm,desired_size)
    except:
        img = np.zeros(desired_size)
    
    return img

# Another sanity check 
plt.imshow(
    _read(train_images_dir+'ID_5c8b5d701'+'.dcm', (256, 256)), cmap=plt.cm.bone
);

### 2. Data generators

Inherits from keras.utils.Sequence object and thus should be safe for multiprocessing.


In [None]:
class DataGenerator(keras.utils.Sequence):

    def __init__(self, list_IDs, labels=None, batch_size=1, img_size=(512, 512, 1), 
                 img_dir=train_images_dir,augment=None, *args, **kwargs):

        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_dir = img_dir
        self.on_epoch_end()
        self.augment = augment

    def __len__(self):
        return int(ceil(len(self.indices) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indices]
        
        if self.labels is not None:
            X, Y = self.__data_generation(list_IDs_temp)
            return X, Y
        else:
            X = self.__data_generation(list_IDs_temp)
            return X
        
    def on_epoch_end(self):
        
        
        if self.labels is not None: # for training phase we undersample and shuffle
            # keep probability of any=0 and any=1
            keep_prob = self.labels.iloc[:, 0].map({0: 0.35, 1: 0.5})
            keep = (keep_prob > np.random.rand(len(keep_prob)))
            self.indices = np.arange(len(self.list_IDs))[keep]
            np.random.shuffle(self.indices)
        else:
            self.indices = np.arange(len(self.list_IDs))

    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, *self.img_size))
        
        if self.labels is not None: # training phase
            Y = np.empty((self.batch_size, 6), dtype=np.float32)
        
            for i, ID in enumerate(list_IDs_temp):
                X[i,] = _read(self.img_dir+ID+".dcm", self.img_size)
                Y[i,] = self.labels.loc[ID].values
        
            if self.augment:
                X = self.__augment(X)
            return X, Y
        
        else: # test phase
            for i, ID in enumerate(list_IDs_temp):
                X[i,] = _read(self.img_dir+ID+".dcm", self.img_size)
            
            return X
        
    def __random_transform(self,img):
        composition = Compose([
            HorizontalFlip(),
            VerticalFlip(),
            ShiftScaleRotate(rotate_limit=45, shift_limit=0.15, scale_limit=0.15),
            RandomBrightness(),
            RandomContrast()
        ])
        
        composed = composition(image=img)
        aug_img = composed['image']
        
        return aug_img
        
    def __augment(self,img_batch):
        for i in range(img_batch.shape[0]):
            img_batch[i, ] = self.__random_transform(img_batch[i, ])
        
        return img_batch

### 3a. loss function and metric

In [None]:
from keras import backend as K

def weighted_log_loss(y_true, y_pred):
    """
    Can be used as the loss function in model.compile()
    ---------------------------------------------------
    """
    
    class_weights = np.array([2., 1., 1., 1., 1., 1.])
    
    eps = K.epsilon()
    
    y_pred = K.clip(y_pred, eps, 1.0-eps)

    out = -(         y_true  * K.log(      y_pred) * class_weights
            + (1.0 - y_true) * K.log(1.0 - y_pred) * class_weights)
    
    return K.mean(out, axis=-1)


def _normalized_weighted_average(arr, weights=None):
    """
    A simple Keras implementation that mimics that of 
    numpy.average(), specifically for this competition
    """
    
    if weights is not None:
        scl = K.sum(weights)
        weights = K.expand_dims(weights, axis=1)
        return K.sum(K.dot(arr, weights), axis=1) / scl
    return K.mean(arr, axis=1)


def weighted_loss(y_true, y_pred):
    """
    Will be used as the metric in model.compile()
    ---------------------------------------------
    
    Similar to the custom loss function 'weighted_log_loss()' above
    but with normalized weights, which should be very similar 
    to the official competition metric:
        https://www.kaggle.com/kambarakun/lb-probe-weights-n-of-positives-scoring
    and hence:
        sklearn.metrics.log_loss with sample weights
    """
    
    class_weights = K.variable([2., 1., 1., 1., 1., 1.])
    
    eps = K.epsilon()
    
    y_pred = K.clip(y_pred, eps, 1.0-eps)

    loss = -(        y_true  * K.log(      y_pred)
            + (1.0 - y_true) * K.log(1.0 - y_pred))
    
    loss_samples = _normalized_weighted_average(loss, class_weights)
    
    return K.mean(loss_samples)


def weighted_log_loss_metric(trues, preds):
    """
    Will be used to calculate the log loss 
    of the validation set in PredictionCheckpoint()
    ------------------------------------------
    """
    class_weights = [2., 1., 1., 1., 1., 1.]
    
    epsilon = 1e-7
    
    preds = np.clip(preds, epsilon, 1-epsilon)
    loss = trues * np.log(preds) + (1 - trues) * np.log(1 - preds)
    loss_samples = np.average(loss, axis=1, weights=class_weights)

    return - loss_samples.mean()



### 3b. Model

Model is divided into three parts: <br> 

* (REMOVED) The initial layer, which will transform/map input image of shape (\_, \_, 1) to another "image" of shape (\_, \_, 3).

* The new input image is then passed through InceptionV3 (which I named "engine"). InceptionV3 could be replaced by any of the available architectures in keras_application.

* Finally, the output from InceptionV3 goes through average pooling followed by two dense layers (including output layer).

In [None]:

class PredictionCheckpoint(keras.callbacks.Callback):
    
    def __init__(self, test_df, valid_df, 
                 test_images_dir=test_images_dir, 
                 valid_images_dir=train_images_dir, 
                 batch_size=32, input_size=(224, 224, 3)):
        
        self.test_df = test_df
        self.valid_df = valid_df
        self.test_images_dir = test_images_dir
        self.valid_images_dir = valid_images_dir
        self.batch_size = batch_size
        self.input_size = input_size
        
    def on_train_begin(self, logs={}):
        self.test_predictions = []
        self.valid_predictions = []
        
    def on_epoch_end(self,batch, logs={}):
        self.test_predictions.append(
            self.model.predict_generator(
                DataGenerator(self.test_df.index, None, self.batch_size, self.input_size, self.test_images_dir), verbose=2)[:len(self.test_df)])
        
        # Commented out to save time
#         self.valid_predictions.append(
#             self.model.predict_generator(
#                 DataGenerator(self.valid_df.index, None, self.batch_size, self.input_size, self.valid_images_dir), verbose=2)[:len(self.valid_df)])
        
#         print("validation loss: %.4f" %
#               weighted_log_loss_metric(self.valid_df.values, 
#                                    np.average(self.valid_predictions, axis=0, 
#                                               weights=[2**i for i in range(len(self.valid_predictions))])))
        
        # here you could also save the predictions with np.save()


class MyDeepModel:
    
    def __init__(self, engine, input_dims, batch_size=5, num_epochs=4, learning_rate=1e-3, 
                 decay_rate=1.0, decay_steps=1, weights="imagenet", verbose=1):
        
        self.engine = engine
        self.input_dims = input_dims
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.learning_rate = learning_rate
        self.decay_rate = decay_rate
        self.decay_steps = decay_steps
        self.weights = weights
        self.verbose = verbose
        self._build()

    def _build(self):
        
        
        engine = self.engine(include_top=False, weights=self.weights, input_shape=self.input_dims,
                             backend = keras.backend, layers = keras.layers,
                             models = keras.models, utils = keras.utils)
        
        x = keras.layers.GlobalAveragePooling2D(name='avg_pool')(engine.output)
        #x = keras.layers.Dropout(0.3)(x)
#         x = keras.layers.Dense(keras.backend.int_shape(x)[1], activation="relu", name="dense_hidden_1")(x)
#         x = keras.layers.Dropout(0.1)(x)
        out = keras.layers.Dense(6, activation="sigmoid", name='dense_output')(x)

        self.model = keras.models.Model(inputs=engine.input, outputs=out)

        self.model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Nadam(), metrics=[weighted_loss])
    

    def fit_and_predict(self, train_df, valid_df, test_df):
        
        # callbacks
        pred_history = PredictionCheckpoint(test_df, valid_df, input_size=self.input_dims)
        checkpoint = keras.callbacks.ModelCheckpoint('model.h5', monitor='weighted_loss',verbose=1, save_best_only=True)
        scheduler = keras.callbacks.LearningRateScheduler(lambda epoch: self.learning_rate * pow(self.decay_rate, floor(epoch / self.decay_steps)))
        
        self.model.fit_generator(
            DataGenerator(
                train_df.index, 
                train_df, 
                self.batch_size, 
                self.input_dims, 
                train_images_dir,
                augment=None
            ),
            epochs=self.num_epochs,
            verbose=self.verbose,
            use_multiprocessing=True,
            workers=4,
            callbacks=[checkpoint,scheduler]
        )
        
        return pred_history
    
    def save(self, path):
        self.model.save_weights(path)
    
    def load(self, path):
        self.model.load_weights(path)

### 4. Read csv files


In [None]:
def read_testset(filename="../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_sample_submission.csv"):
    df = pd.read_csv(filename)
    df["Image"] = df["ID"].str.slice(stop=12)
    df["Diagnosis"] = df["ID"].str.slice(start=13)
    
    df = df.loc[:, ["Label", "Diagnosis", "Image"]]
    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
    
    return df

def read_trainset(filename="../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train.csv"):
    df = pd.read_csv(filename)
    df["Image"] = df["ID"].str.slice(stop=12)
    df["Diagnosis"] = df["ID"].str.slice(start=13)
    
    """
    print(df.shape)
    ids = df['ID']
    
    duplicates = df[ids.isin(ids[ids.duplicated()])]
    
    print(duplicates['ID'].unique().shape,duplicates['ID'].shape)
    
    print(df[df.duplicated()==False].shape)
    
    duplicates_to_remove = [
        1598538, 1598539, 1598540, 1598541, 1598542, 1598543,
        312468,  312469,  312470,  312471,  312472,  312473,
        2708700, 2708701, 2708702, 2708703, 2708704, 2708705,
        3032994, 3032995, 3032996, 3032997, 3032998, 3032999
    ]
    
    df = df.drop(index=duplicates_to_remove)
    """
    df = df[df.duplicated()==False]
    df = df.reset_index(drop=True)
    
    df = df.loc[:, ["Label", "Diagnosis", "Image"]]
    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
    
    return df

In [None]:
dupp = ['489ae4179', '854fba667', '921490062', 'a64d5deed']

In [None]:
filename="../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train.csv"
df = pd.read_csv(filename)

df["Image"] = df["ID"].str.slice(stop=12)
df["Diagnosis"] = df["ID"].str.slice(start=13)
df = df[df.duplicated()==False]

idxx = df["Image"].values
ll = int(len(df)/6)
for i in range(ll):
    for j in range(6):
        idxx[(i*6)+j] = idxx[(i*6)+j]+'_'+str(i)

        
df["Image"] = idxx
df

In [None]:
# df[df["Image"]=='ID_'+ dupp[3]]

In [None]:
df = df[df.duplicated()==False]
df = df.reset_index(drop=True)

df = df.loc[:, ["Label", "Diagnosis", "Image"]]
df

In [None]:
df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)
df

In [None]:
final_idx = np.array(df.index)
lll = len(final_idx)

for i in range(lll):
    final_idx[i] = final_idx[i][13:]

In [None]:
final_idx = np.array(final_idx,int)
final_idx

In [None]:
test_df = read_testset()
df = read_trainset()

In [None]:
df.head(3)

In [None]:
test_df.head(3)

### 5. Train model and predict

*Using train, validation and test set* <br>

Training for 4 epochs with Adam optimizer, with a learning rate of 0.0005 and decay rate of 0.8. The validation predictions are \[exponentially weighted\] averaged over all 4 epochs (same goes for the test set submission later). `fit_and_predict` returns validation and test predictions for all epochs.


In [None]:
# train set (00%) and validation set (10%)
ss = ShuffleSplit(n_splits=10, test_size=0.1, random_state=42).split(df.index)

# lets go for the first fold only
train_idx, valid_idx = next(ss)

# obtain model
# SE_resnext50, preprocess_input = Classifiers.get('seresnext50')

# model = MyDeepModel(engine=SE_resnext50, input_dims=(224, 224, 3), batch_size=32, learning_rate=5e-4,
#                     num_epochs=4, decay_rate=0.8, decay_steps=1, weights="imagenet", verbose=1)



In [None]:
# obtain test + validation predictions (history.test_predictions, history.valid_predictions)
# history = model.fit_and_predict(df.iloc[train_idx], df.iloc[valid_idx], test_df)

# model.model.load_weights('../input/inceptionv3model/model.h5')

### 6. Submit test predictions

In [None]:
# prediction = model.model.predict_generator(DataGenerator(test_df.index, None, 32, (224, 224, 3), test_images_dir), verbose=1)
# print(prediction.shape)

In [None]:
col = ['any','epidural','intraparenchymal','intraventricular','subarachnoid','subdural']

In [None]:
train_df0 = df.iloc[train_idx]
val_df0 = df.iloc[valid_idx]
train_df0.shape,val_df0.shape

In [None]:
for i in range(6):
    print(col[i],(train_df0['Label'][col[i]]==1).sum())
    

for i in range(6):
    print(col[i],(val_df0['Label'][col[i]]==1).sum())

In [None]:
# for train and valid

# test_df0 = val_df0.iloc[:]
# prediction = model.model.predict_generator(DataGenerator(test_df0.index, None, 32, (224, 224, 3), train_images_dir), verbose=1)
# print(prediction.shape)
# np.savez_compressed(f'pred_val.npz',data = prediction)
# test_df0.iloc[:, :] = prediction[:len(test_df0)]
# test_df0 = test_df0.stack().reset_index()
# test_df0.insert(loc=0, column='ID', value=test_df0['Image'].astype(str) + "_" + test_df0['Diagnosis'])
# test_df0 = test_df0.drop(["Image", "Diagnosis"], axis=1)
# test_df0.to_csv('submission_val.csv', index=False)

In [None]:
# for train 

# test_df0 = train_df0.iloc[:]
# prediction = model.model.predict_generator(DataGenerator(test_df0.index, None, 32, (224, 224, 3), train_images_dir), verbose=1)
# print(prediction.shape)
# np.savez_compressed(f'pred_train.npz',data = prediction)
# test_df0.iloc[:, :] = prediction[:len(test_df0)]
# test_df0 = test_df0.stack().reset_index()
# test_df0.insert(loc=0, column='ID', value=test_df0['Image'].astype(str) + "_" + test_df0['Diagnosis'])
# test_df0 = test_df0.drop(["Image", "Diagnosis"], axis=1)
# test_df0.to_csv('submission_train.csv', index=False)

In [None]:
# test_df0 = test_df.iloc[:512]

# prediction = model.model.predict_generator(DataGenerator(test_df0.index, None, 32, (224, 224, 3), test_images_dir), verbose=1)
# print(prediction.shape)

# np.savez_compressed(f'pred_test.npz',data = prediction)
# test_df0.iloc[:, :] = prediction[:len(test_df0)]
# test_df0 = test_df0.stack().reset_index()
# test_df0.insert(loc=0, column='ID', value=test_df0['Image'].astype(str) + "_" + test_df0['Diagnosis'])
# test_df0 = test_df0.drop(["Image", "Diagnosis"], axis=1)
# test_df0.to_csv('submission_test.csv', index=False)

In [None]:
# """
# try:
#     test_df.iloc[:, :] = np.average(history.test_predictions, axis=0, weights=[0, 1, 2]) # let's do a weighted average for epochs (>1)
# except:
#     test_df.iloc[:, :] = np.average(history.test_predictions, axis=0, weights=[2**i for i in range(len(history.test_predictions))])
#     #latest = history.test_predictions
# """
# test_df0.iloc[:, :] = prediction[:len(test_df0)]

# test_df0 = test_df0.stack().reset_index()

# test_df0.insert(loc=0, column='ID', value=test_df0['Image'].astype(str) + "_" + test_df0['Diagnosis'])

# test_df0 = test_df0.drop(["Image", "Diagnosis"], axis=1)

# test_df0.to_csv('submission_test.csv', index=False)

In [None]:
train_df0['Label'].columns

In [None]:
from sklearn.metrics import roc_auc_score,roc_curve
from sklearn import metrics
import matplotlib.pyplot as plt

In [None]:
# result val
col = ['any','epidural','intraparenchymal','intraventricular','subarachnoid','subdural']
i = 1
pred = np.load('../input/intracranialhemorrhageserenext50output/pred_val.npz')['data']
true = val_df0.values

for i in range(6):
#     print(auc(true[:,i],pred[:75281,i]))
    fpr, tpr, _ = metrics.roc_curve(true[:,i],pred[:75281,i])
    auc = np.round(metrics.roc_auc_score(true[:,i],pred[:75281,i]),4)
    plt.plot(fpr,tpr,label=f"Class={col[i]}, auc="+str(auc))
    plt.legend(prop={"size":12})
    plt.xlim(0,0.4)
    plt.ylim(0.6,1)
    plt.ylabel('Sensitivity')
    plt.xlabel('1 - Specificity')
    plt.savefig(f'{col[i]}.jpg',dpi=250,bbox_inches = 'tight')
    plt.show()

In [None]:
a = weighted_log_loss(true[:],pred[:75281])
b = a.eval(session=tf.compat.v1.Session())  
np.mean(b)

In [None]:
# result train
i = 1
pred = np.load('../input/intracranialhemorrhageserenext50output/pred_train.npz')['data']
true = train_df0.values
for i in range(6):
    print(roc_auc_score(true[:,i],pred[:677522,i]))

In [None]:
a = weighted_log_loss(true[:],pred[:677522])
b = a.eval(session=tf.compat.v1.Session())  
np.mean(b)

In [None]:
# result full
i = 1
pred = np.concatenate((np.load('../input/intracranialhemorrhageserenext50output/pred_train.npz')['data'][:677522],
            np.load('../input/intracranialhemorrhageserenext50output/pred_val.npz')['data'][:75281]),axis=0)
true = np.concatenate((train_df0.values,val_df0.values),axis=0)
for i in range(6):
    print(roc_auc_score(true[:,i],pred[:,i]))

In [None]:
a = weighted_log_loss(true[:],pred[:])
b = a.eval(session=tf.compat.v1.Session())  
np.mean(b)

In [None]:
np.savez_compressed(f'train_idx.npz',data = train_idx)
np.savez_compressed(f'valid_idx.npz',data = valid_idx)

# post process

In [None]:
i = 0
df_meta_train = pd.read_csv(f'../input/intracranialhemorrhagemetayousefzadeh/meta_train_{i}.csv')
col = df_meta_train.columns
df_meta_train = df_meta_train.values

for i in range(1,8):
    df_meta_train = np.concatenate((df_meta_train,pd.read_csv(f'../input/intracranialhemorrhagemetayousefzadeh/meta_train_{i}.csv').values),axis=0)
# df_meta_train = pd.DataFrame(df_meta_train[final_idx],columns=col)\
df_meta_train = pd.DataFrame(df_meta_train,columns=col)

df_meta_train = df_meta_train[df_meta_train.duplicated()==False]
df_meta_train = df_meta_train.iloc[final_idx]

In [None]:
final_idx.shape,df_meta_train.shape

In [None]:
df_meta_train[['type_0','type_1','type_2','type_3','type_4','type_5']].values.sum(axis=0)/len(df_meta_train)

In [None]:
# p_df_train = np.unique(df_meta_train['patient_id'])
p_df_train

In [None]:
p_df_train = np.unique(df_meta_train['patient_id'])
np.random.shuffle(p_df_train)

new_p = df_meta_train.iloc[df_meta_train['patient_id'].values==p_df_train[0]]
new_p = new_p.sort_values(by=['position_2']).values

for i in range(1,len(p_df_train)):
    print(i)
    new_p1 = df_meta_train.iloc[df_meta_train['patient_id'].values==p_df_train[i]]
    new_p = np.concatenate((new_p,new_p1.sort_values(by=['position_2']).values),axis=0)

In [None]:
!mkdir zip_100

In [None]:
from scipy.ndimage import zoom
p_df_train = np.unique(df_meta_train['patient_id'])
# np.random.shuffle(p_df_train)
for i in range(100):
    new_p = df_meta_train.iloc[df_meta_train['patient_id'].values==p_df_train[i]]
    new_p = new_p.sort_values(by=['position_2'])#.values
    slices = new_p.id.values
    dcm_3d = []
    for slice_id in slices:
        dcm = pydicom.dcmread(f'../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/stage_2_train/ID_{slice_id}.dcm')
        dcm = dcm.pixel_array
        dcm_3d.append(dcm)
    dcm_3d = np.array(dcm_3d)
    dcm_3d = dcm_3d.transpose(2,1,0)
    dcm_3d = zoom(dcm_3d,(256/dcm_3d.shape[0],256/dcm_3d.shape[1],1),order=1)
    np.savez(f'./zip_100/{i}.npz',data=dcm_3d)

In [None]:
nn_df = pd.DataFrame(new_p,columns = df_meta_train.columns)
nn_df

In [None]:
nn_df = pd.DataFrame(new_p,columns = df_meta_train.columns)
nn_df.to_csv('meta_patient_homorrhage.csv',index=False)
nn_df

In [None]:
p_df_train = np.unique(df_meta_train['patient_id'])
new_p = df_meta_train.iloc[df_meta_train['patient_id'].values==p_df_train[2]]
new_p = new_p.sort_values(by=['position_2'])
for i in new_p['id']:
    plt.imshow(_read(train_images_dir+f'ID_{i}.dcm', (256, 256)), cmap=plt.cm.bone)
    plt.show()

In [None]:
# full data
train_meta_df0 = df_meta_train.iloc[train_idx]
val_meta_df0 = df_meta_train.iloc[valid_idx]
train_meta_df0.shape,val_meta_df0.shape

In [None]:
train_meta_df0.columns

In [None]:
full_meta = np.concatenate((train_meta_df0.values, val_meta_df0.values),axis=0)
df_df_full_meta = pd.DataFrame(full_meta,columns = train_meta_df0.columns)
full_pred = np.concatenate((np.load('../input/intracranialhemorrhageserenext50output/pred_train.npz')['data'][:677522],
            np.load('../input/intracranialhemorrhageserenext50output/pred_val.npz')['data'][:75281]),axis=0)

df_df_full_meta = df_df_full_meta[['id','type_5', 'type_0', 'type_1', 'type_2', 'type_3', 'type_4',
       'patient_id', 'position_0', 'position_1', 'position_2', 'orientation_0',
       'orientation_1', 'orientation_2', 'orientation_3', 'orientation_4',
       'orientation_5']]

full_meta = df_df_full_meta.values

for i,j in enumerate(['p1','p2','p3','p4','p5','p6']):
    df_df_full_meta[j] = full_pred[:,i]

In [None]:
# result full
true = np.array(full_meta[:,[1,2,3,4,5,6]],int)
for i in range(6):
    print(roc_auc_score(true[:,i],full_pred[:,i]))

In [None]:
# jj = -125000
# print(df.iloc[jj])
# print('###############################')
# df_meta_train.iloc[jj]

In [None]:
# labels, counts = np.unique(zz,return_counts=True)

In [None]:
# vvv0 = df.iloc[train_idx]
# vvv1 = df_meta_train.iloc[train_idx]

# jj = 2
# print(vvv0.iloc[jj])
# print('###############################')
# vvv1.iloc[jj]

In [None]:
from scipy.ndimage import gaussian_filter as gf

In [None]:
p_df_train = np.unique(df_df_full_meta['patient_id'])
new_p = df_df_full_meta.iloc[df_df_full_meta['patient_id'].values==p_df_train[5]]
new_p = new_p.sort_values(by=['position_2'])

plt.plot(new_p['position_2'],new_p.iloc[:,1+16])
plt.plot(new_p['position_2'],new_p.iloc[:,1],color='g')
plt.plot(new_p['position_2'],gf(new_p.iloc[:,1+16],1),'r')
title = np.array(np.max(new_p.iloc[:,1:7].values,axis=0),str)
plt.title(title[0]+title[1]+title[2]+title[3]+title[4]+title[5])

In [None]:
p_df_train = np.unique(df_df_full_meta['patient_id'])
new_p = df_df_full_meta.iloc[df_df_full_meta['patient_id'].values==p_df_train[5]]
new_p = new_p.sort_values(by=['position_2'])

# plt.plot(new_p['position_2'],)
plt.plot(new_p['position_2'],gf(new_p.iloc[:,1+16],2)-new_p.iloc[:,1+16],'r')
title = np.array(np.max(new_p.iloc[:,1:7].values,axis=0),str)
plt.title(title[0]+title[1]+title[2]+title[3]+title[4]+title[5])

In [None]:
p_df_train = np.unique(df_df_full_meta['patient_id'])
all_result = []

# [0,0.4,0.5,0.6,0.7,0.8,1,1.2,1.4,1.6,2,2.5]

for sigma in [1.6,2,2.5]:
    gf_out = []
    new_df = []

    g1,g2,g3,g4,g5,g6 = [[],[],[],[],[],[]]
    for i,j in enumerate(p_df_train[:]):
    #     print(i)
        new_p = df_df_full_meta.iloc[df_df_full_meta['patient_id'].values==j]
        new_p = new_p.sort_values(by=['position_2'])
        g1+=list(gf(new_p.iloc[:,17],sigma))
        g2+=list(gf(new_p.iloc[:,18],sigma))
        g3+=list(gf(new_p.iloc[:,19],sigma))
        g4+=list(gf(new_p.iloc[:,20],sigma))
        g5+=list(gf(new_p.iloc[:,21],sigma))
        g6+=list(gf(new_p.iloc[:,22],sigma))

        new_df+=list(new_p.values)

    g1 = np.array(g1)
    g2 = np.array(g2)
    g3 = np.array(g3)
    g4 = np.array(g4)
    g5 = np.array(g5)
    g6 = np.array(g6)
    new_df = np.array(new_df)
    new_df = pd.DataFrame(new_df,columns = new_p.columns)
    result_gf = [metrics.roc_auc_score(np.array(new_df.iloc[:,1].values,int),g1),
                 metrics.roc_auc_score(np.array(new_df.iloc[:,2].values,int),g2),
                 metrics.roc_auc_score(np.array(new_df.iloc[:,3].values,int),g3),
                 metrics.roc_auc_score(np.array(new_df.iloc[:,4].values,int),g4),
                 metrics.roc_auc_score(np.array(new_df.iloc[:,5].values,int),g5),
                 metrics.roc_auc_score(np.array(new_df.iloc[:,6].values,int),g6)]
    
    print(sigma)
    print(result_gf)
    print('#########################')
    all_result.append(result_gf)
    
np.save('all_result.npy',all_result)