In [2]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image, ImageDraw, ImageFilter
import json
import csv

Annotating a Sample Image

In [3]:
epidural_csv_file = 'segmentations/Results_Epidural Hemorrhage Detection_2020-11-16_21.31.26.148.csv'
epidural_image_repo = 'epidural/max_contrast_window'
epidural_image_file = 'ID_004c4b319.jpg'

In [4]:
def get_coords(image, image_csv_file, image_file_name):
    labels = get_labels_csv(image_csv_file, image_file_name)
    coord_list = []
    for label in labels:
        coord = (label['x'] * image.size[0], label['y'] * image.size[1])
        coord_list.append(coord)
    return coord_list
def get_labels_csv(image_csv_file, image_file_name):
    with open(image_csv_file) as csv_file:
        data_reader = csv.reader(csv_file, dialect='excel')
        for row in data_reader:
            if row[1] == image_file_name:
                return json.loads(row[7])
    return None

In [5]:
im = Image.open(f'{epidural_image_repo}/{epidural_image_file}')
features = []
for x in range(im.size[0]):
    for y in range(20):
        features.append(im.getpixel((x, y))[0])
        features.append(im.getpixel((x, y))[1])
        features.append(im.getpixel((x, y))[2])
draw = ImageDraw.Draw(im)
draw.line(get_coords(im, epidural_csv_file, epidural_image_file), fill='red')
im.show()

Reading an Image and Its Classification

In [6]:
num_examples = 2500
num_features = 786432
standard_height, standard_width = 512, 512

In [7]:
def get_malformed_images():
    images = []
    directories = ['subdural/max_contrast_window', 'multi/max_contrast_window']
    for directory in directories:
        for filename in os.listdir(directory):
            im = Image.open(f'{directory}/{filename}')
            width, height = im.size
            if width != standard_width and height != standard_height:
                images.append(filename[:-4])
    return images

In [8]:
malformed_images = get_malformed_images() 
print(len(malformed_images))

86


In [9]:
malformed_images

['ID_3e60e696d',
 'ID_0f8aa5749',
 'ID_85900eb84',
 'ID_7e870621c',
 'ID_b966185b8',
 'ID_69974dd3e',
 'ID_09aeb0bbd',
 'ID_75e3f7e5a',
 'ID_2fd4dda7c',
 'ID_23d0b13b7',
 'ID_c6f2d84be',
 'ID_1bb3b44c7',
 'ID_dfaa49f5c',
 'ID_cb970c6dc',
 'ID_830f46cad',
 'ID_898ff55b6',
 'ID_88b0d8b4f',
 'ID_f4c2157d8',
 'ID_9a36e4b0e',
 'ID_72dce7784',
 'ID_f03370d7c',
 'ID_dd083e12a',
 'ID_60a1f0e24',
 'ID_882cd57de',
 'ID_94463e98f',
 'ID_c037d5727',
 'ID_a2f9ba4bf',
 'ID_d3b76ef6e',
 'ID_c6bbec638',
 'ID_798d956d0',
 'ID_631f0b556',
 'ID_a9ab8569f',
 'ID_ae691dd29',
 'ID_cec3997fa',
 'ID_75d691728',
 'ID_ae7020fd1',
 'ID_191369dca',
 'ID_64b44f180',
 'ID_8dc299456',
 'ID_038f966b9',
 'ID_19f266244',
 'ID_0c4987103',
 'ID_f0d55b727',
 'ID_c07d2cb73',
 'ID_0e1861e6d',
 'ID_f188940f9',
 'ID_91b9ce430',
 'ID_ff9674e53',
 'ID_a23a8193f',
 'ID_a880e377e',
 'ID_c6463f07d',
 'ID_b4adf8739',
 'ID_f698edc00',
 'ID_bd4f3f06f',
 'ID_4e61fb0b2',
 'ID_445a92ac2',
 'ID_fd5c41761',
 'ID_985fb5e49',
 'ID_3c8b72361

In [10]:
all_labels = pd.read_csv('segmentations/hemorrhage-labels.csv')

In [11]:
labels = all_labels[all_labels['any'] == 1]

In [12]:
labels

Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
14,ID_0002081b6,1,0,1,0,0,0
24,ID_0002a38ad,1,0,0,0,1,1
33,ID_000346ce2,1,0,0,0,0,1
36,ID_00042829c,1,0,0,1,0,0
43,ID_0004a5701,1,0,0,0,0,1
...,...,...,...,...,...,...,...
752755,ID_fffc60817,1,0,1,1,0,0
752769,ID_fffd00949,1,0,0,0,1,0
752783,ID_fffe2edb8,1,0,1,1,0,0
752799,ID_ffff922b9,1,0,0,1,0,0


In [13]:
for image_name in malformed_images:
    labels = labels[labels.Image != image_name]
labels

Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
14,ID_0002081b6,1,0,1,0,0,0
24,ID_0002a38ad,1,0,0,0,1,1
33,ID_000346ce2,1,0,0,0,0,1
36,ID_00042829c,1,0,0,1,0,0
43,ID_0004a5701,1,0,0,0,0,1
...,...,...,...,...,...,...,...
752755,ID_fffc60817,1,0,1,1,0,0
752769,ID_fffd00949,1,0,0,0,1,0
752783,ID_fffe2edb8,1,0,1,1,0,0
752799,ID_ffff922b9,1,0,0,1,0,0


In [16]:
labels = labels.iloc[:num_examples].reset_index(drop=True)

In [17]:
labels

Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural
0,ID_0002081b6,1,0,1,0,0,0
1,ID_0002a38ad,1,0,0,0,1,1
2,ID_000346ce2,1,0,0,0,0,1
3,ID_00042829c,1,0,0,1,0,0
4,ID_0004a5701,1,0,0,0,0,1
...,...,...,...,...,...,...,...
2495,ID_05c657224,1,0,0,0,0,1
2496,ID_05c676824,1,0,0,0,0,1
2497,ID_05c67efcc,1,0,0,0,0,1
2498,ID_05c6e5b00,1,0,0,0,0,1


In [18]:
def get_image_data():
    return labels.apply(lambda row: get_data(row), axis=1, result_type='expand')
def get_data(row):
    if row[2] + row[3] + row[4] + row[5] + row[6] > 1:
        return plt.imread(f'multi/max_contrast_window/{row[0]}.jpg').flatten()
    elif row[2] == 1:
        return plt.imread(f'epidural/max_contrast_window/{row[0]}.jpg').flatten()
    elif row[3] == 1:
        return plt.imread(f'intraparenchymal/max_contrast_window/{row[0]}.jpg').flatten()
    elif row[4] == 1:
        return plt.imread(f'intraventricular/max_contrast_window/{row[0]}.jpg').flatten()
    elif row[5] == 1:
        return plt.imread(f'subarachnoid/max_contrast_window/{row[0]}.jpg').flatten()
    elif row[6] == 1:
        return plt.imread(f'subdural/max_contrast_window/{row[0]}.jpg').flatten()
    return []

In [19]:
image_data = get_image_data()

In [20]:
image_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,786422,786423,786424,786425,786426,786427,786428,786429,786430,786431
0,26,26,36,27,27,37,26,26,36,25,...,7,0,1,7,0,1,7,0,1,7
1,31,31,41,27,27,37,22,22,32,21,...,111,78,77,108,75,74,105,73,72,103
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,38,38,50,36,36,48,32,32,44,29,...,42,19,18,32,19,18,32,25,24,38
2496,0,0,0,0,0,0,0,0,0,0,...,26,18,19,24,18,19,24,20,21,26
2497,38,38,50,33,33,45,29,29,41,31,...,102,76,76,104,77,77,105,78,78,106
2498,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
total_data = pd.concat([labels, image_data], axis=1)

In [22]:
total_data

Unnamed: 0,Image,any,epidural,intraparenchymal,intraventricular,subarachnoid,subdural,0,1,2,...,786422,786423,786424,786425,786426,786427,786428,786429,786430,786431
0,ID_0002081b6,1,0,1,0,0,0,26,26,36,...,7,0,1,7,0,1,7,0,1,7
1,ID_0002a38ad,1,0,0,0,1,1,31,31,41,...,111,78,77,108,75,74,105,73,72,103
2,ID_000346ce2,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,ID_00042829c,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,ID_0004a5701,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,ID_05c657224,1,0,0,0,0,1,38,38,50,...,42,19,18,32,19,18,32,25,24,38
2496,ID_05c676824,1,0,0,0,0,1,0,0,0,...,26,18,19,24,18,19,24,20,21,26
2497,ID_05c67efcc,1,0,0,0,0,1,38,38,50,...,102,76,76,104,77,77,105,78,78,106
2498,ID_05c6e5b00,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [281]:
# legacy function: loads data for a particular classification into a DataFrame
#def get_images(classification, max_size=1000000): 
#    classification_labels = labels[(labels[classification] == 1) & (labels[columns].sum(axis=1) == 1)]
#    data = np.zeros((classification_labels.shape[0], num_features))
#    for index, value in enumerate(classification_labels['Image'].items()):
#        data[index] = plt.imread(f'{classification}/max_contrast_window/{value[1]}.jpg').flatten()
#        if index == max_size:
#            break
#    return data
#epidural = get_images('epidural', 1000)
#intraparenchymal = get_images('intraparenchymal', 1000)
#intraventricular = get_images('intraventricular', 1000)
#subarachnoid = get_images('subarachnoid', 1000)
#subdural = get_images('subdural', 1000)

In [282]:
# legacy function: loads data for a the "multi" classification into a DataFrame
#def get_multi_class_images(max_size=1000000):
#    classification_labels = labels[(labels[columns].sum(axis=1) > 1)]
#    data = np.zeros((classification_labels.shape[0], num_features))
#    for index, value in enumerate(classification_labels['Image'].items()):
#        data[index] = plt.imread(f'multi/max_contrast_window/{value[1]}.jpg').flatten()
#        if index == max_size:
#            break
#    return data
#multi = get_multi_class_images(1000)

In [283]:
# the line of code below loads the 'all labels' field in the csv file in json
# coords_lists = json.loads(row[4].replace("'[", "[").replace("]'", "],").replace(',]', ']'))