In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
base_folder = '../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/'

In [None]:
HEIGHT = 256
WIDTH = 256
CHANNELS = 3

SHAPE = (HEIGHT, WIDTH, CHANNELS)
image_path = base_folder + 'stage_2_train/'

# load image and perform windowing

In [None]:
import matplotlib.pyplot as plt
import pydicom
import cv2

def correct_dcm(dcm):
    x = dcm.pixel_array + 1000
    px_mode = 4096
    x[x>=px_mode] = x[x>=px_mode] - px_mode
    dcm.PixelData = x.tobytes()
    dcm.RescaleIntercept = -1000

def window_image(dcm, window_center, window_width):    
    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):
        correct_dcm(dcm)
    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept
    
    # Resize
    img = cv2.resize(img, SHAPE[:2], interpolation = cv2.INTER_LINEAR)
   
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img = np.clip(img, img_min, img_max)
    return img

def bsb_window(dcm):
    brain_img = window_image(dcm, 40, 80)
    subdural_img = window_image(dcm, 80, 200)
    soft_img = window_image(dcm, 40, 380)
    
    brain_img = (brain_img - 0) / 80
    subdural_img = (subdural_img - (-20)) / 200
    soft_img = (soft_img - (-150)) / 380
    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)
    return bsb_img

def _read(path, SHAPE):
    dcm = pydicom.dcmread(path)
    try:
        image = bsb_window(dcm)
    except:
        image = np.zeros(SHAPE)
    image -= image.min((0,1))
    image = (255*image).astype(np.uint8)
    image = cv2.resize(image, (256, 256))
    return image

# prepare csv

In [None]:
df = pd.read_csv(base_folder + 'stage_2_train.csv')
df[['ID', 'type']] = df['ID'].str.rsplit("_", n=1, expand=True)
df.drop_duplicates(['ID', 'type'], inplace=True)
df = df.pivot('ID', 'type', 'Label')
df.reset_index(inplace=True)
count_row = df.shape[0]
print(count_row)
df.head()

In [None]:
index = []

In [None]:
for image_id in df["ID"].tolist():
#     load image
    image = _read(image_path + image_id + ".dcm", SHAPE)

#     calculate histograms
    colors = ('r','g','b')
    histograms = []
    for k,color in enumerate(colors):
        histogram = cv2.calcHist([image],[k],None,[256],[0,256])
        histograms.append(histogram)

    for histogram in histograms:
        histogram_sum = np.sum(histogram[:250])
        weighing_factor = [
          (histogram[i] * (i)) / histogram_sum for i in range (250)
        ]

        if (sum(weighing_factor[:250]) == 0): 
            index.append(image_id)

In [None]:
index = list(set(index))

In [None]:
# visualizing some of the blank images
import random

n = len(index) - 1

for i in range(3):
    im = _read(image_path + index[random.randint(0,n)] + ".dcm", SHAPE)
    plt.imshow(im)
    plt.show()

In [None]:
np.save("blank_images", index)