In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image, ImageDraw, ImageFilter
import json
import csv

Annotating a Sample Image

In [3]:
epidural_csv_file = 'segmentations/Results_Epidural Hemorrhage Detection_2020-11-16_21.31.26.148.csv'
epidural_image_repo = 'epidural/max_contrast_window'
epidural_image_file = 'ID_004c4b319.jpg'

In [4]:
def get_coords(image, image_csv_file, image_file_name):
    labels = get_labels_csv(image_csv_file, image_file_name)
    coord_list = []
    for label in labels:
        coord = (label['x'] * image.size[0], label['y'] * image.size[1])
        coord_list.append(coord)
    return coord_list
def get_labels_csv(image_csv_file, image_file_name):
    with open(image_csv_file) as csv_file:
        data_reader = csv.reader(csv_file, dialect='excel')
        for row in data_reader:
            if row[1] == image_file_name:
                return json.loads(row[7])
    return None

In [5]:
im = Image.open(f'{epidural_image_repo}/{epidural_image_file}')
features = []
for x in range(im.size[0]):
    for y in range(20):
        features.append(im.getpixel((x, y))[0])
        features.append(im.getpixel((x, y))[1])
        features.append(im.getpixel((x, y))[2])
draw = ImageDraw.Draw(im)
draw.line(get_coords(im, epidural_csv_file, epidural_image_file), fill='red')
im.show()

In [6]:
# the line of code below loads the 'all labels' field in the csv file in json
# coords_lists = json.loads(row[4].replace("'[", "[").replace("]'", "],").replace(',]', ']'))

Reading an Image and Its Classification

In [7]:
num_features = 786432
subdural_num_features = 457776

In [8]:
all_labels = pd.read_csv('segmentations/hemorrhage-labels.csv')

In [9]:
all_labels

Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_000012eaf,0,0,0,0,0,0
1,ID_000039fa0,0,0,0,0,0,0
2,ID_00005679d,0,0,0,0,0,0
3,ID_00008ce3c,0,0,0,0,0,0
4,ID_0000950d7,0,0,0,0,0,0
...,...,...,...,...,...,...,...
752798,ID_ffff82e46,0,0,0,0,0,0
752799,ID_ffff922b9,1,0,0,1,0,0
752800,ID_ffffb670a,1,0,0,0,1,0
752801,ID_ffffcbff8,0,0,0,0,0,0


In [10]:
labels = all_labels[all_labels['any'] == 1]

In [11]:
labels

Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
14,ID_0002081b6,1,0,1,0,0,0
24,ID_0002a38ad,1,0,0,0,1,1
33,ID_000346ce2,1,0,0,0,0,1
36,ID_00042829c,1,0,0,1,0,0
43,ID_0004a5701,1,0,0,0,0,1
...,...,...,...,...,...,...,...
752755,ID_fffc60817,1,0,1,1,0,0
752769,ID_fffd00949,1,0,0,0,1,0
752783,ID_fffe2edb8,1,0,1,1,0,0
752799,ID_ffff922b9,1,0,0,1,0,0


In [12]:
columns = ['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']

In [13]:
def get_images(classification, max_size=1000000): 
    classification_labels = labels[(labels[classification] == 1) & (labels[columns].sum(axis=1) == 1)]
    data = np.zeros((classification_labels.shape[0], num_features))
    for index, value in enumerate(classification_labels['Image'].items()):
        data[index] = plt.imread(f'{classification}/max_contrast_window/{value[1]}.jpg').flatten()
        if index == max_size:
            break
    return data

In [14]:
def get_multi_class_images(max_size=1000000):
    classification_labels = labels[(labels[columns].sum(axis=1) > 1)]
    data = np.zeros((classification_labels.shape[0], num_features))
    for index, value in enumerate(classification_labels['Image'].items()):
        data[index] = plt.imread(f'multi/max_contrast_window/{value[1]}.jpg').flatten()
        if index == max_size:
            break
    return data

In [15]:
epidural = get_images('epidural', 1000)

In [16]:
intraparenchymal = get_images('intraparenchymal', 1000)

In [17]:
intraventricular = get_images('intraventricular', 1000)

In [18]:
subarachnoid = get_images('subarachnoid', 1000)

In [138]:
subdural = get_images('subdural', 1000)

ValueError: could not broadcast input array from shape (457776,) into shape (786432,)

In [19]:
multi = get_multi_class_images(1000)

In [135]:
epidural

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [20]:
intraparenchymal

array([[26., 26., 36., ...,  0.,  1.,  7.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [36., 36., 48., ..., 27., 27., 35.],
       ...,
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [21]:
multi

array([[ 31.,  31.,  41., ...,  73.,  72., 103.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [ 34.,  34.,  44., ...,   2.,   2.,  10.],
       ...,
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.]])

In [22]:
intraventricular.shape

(9878, 786432)

In [23]:
classification_labels = labels[(labels['subdural'] == 1) & (labels[columns].sum(axis=1) == 1)]
data = np.zeros((classification_labels.shape[0], num_features))
print(data.shape)
for index, value in enumerate(classification_labels['Image'].items()):
    print(plt.imread(f'subdural/max_contrast_window/{value[1]}.jpg').reshape(1,-1).shape)
    if index == 1000:
        break

(32200, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)


(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 786432)
(1, 