In [5]:
import numpy as np
import pandas as pd
import pydicom
%matplotlib inline
import matplotlib.pyplot as plt
import keras
import time

In [2]:
from skimage import data, color
from skimage.transform import resize
from keras.models import model_from_json

In [3]:
# This function reads in a .dcm file, checks the important fields for our device, and returns a numpy array
# of just the imaging data
def check_dicom(filename, verbose=True): 

    if verbose: print('\nLoad file {} ...'.format(filename))
    ds = pydicom.dcmread(filename)       
    img = ds.pixel_array

    # Image dimensions
    if verbose: print('Image size: {}'.format(img.shape))

    # Body Part
    body_part = ds.BodyPartExamined.upper()
    if body_part != 'CHEST':
        if verbose:
            print('Image rejected. Body part should be "CHEST" but it is {}\n'.format(body_part))
        return None
    else:
        if verbose: print('Body part: CHEST')

    # Modality
    modality = ds.Modality.upper()
    if modality != 'DX':
        if verbose:
            print('Image rejected. Modality should be "DX" but it is {}\n'.format(modality))
        return None
    else:
        if verbose: print('Modality: DX')

    # View position (PA or AP)
    position = ds.PatientPosition.upper()
    if (position != 'PA') & (position != 'AP'):
        if verbose:
            print('Image rejected. Patient position should be "PA" or "AP" but it is {}\n'.format(position))
        return None
    else:
        if verbose: print('Patient position: {}'.format(position))

    # Findings
    findings = ds.StudyDescription
    if verbose: print('Findings: {}'.format(findings))

    return img
    
    
# This function takes the numpy array output by check_dicom and 
# runs the appropriate pre-processing needed for our model input
def preprocess_image(img, img_mean=None, img_std=None, img_size=(224, 224)):
    # As stated in the last cell, the dimensions of the images are considered
    # to be (1, 224, 224, 3), meaning that could be RGB because of the 3 in
    # the last dimension. That's why I convert to grayscale if needed.
    if len(img.shape)==3 & img.shape[-1]==3:
        # The image output is (width, height)
        img = np.squeeze(color.rgb2gray(img))

    # The train dataset has been standarized with rescale 1.0/255.
    # Mean normalize is not a must, and I haven't done, but I leave
    # the posibility open in this functions for future works
    if (img_mean!=None) & (img_std!=None):
        # Mean normalize
        proc_img = (img - img_mean) / img_std

    # 'Resize' already incorporates scaling:
    # https://scikit-image.org/docs/stable/api/skimage.transform.html#skimage.transform.resize    
    # Resize to output dim: (224, 224)
    proc_img = resize(img, (img_size[1], img_size[2]))

    # Reshape to output dim: (1, 224, 224, 1)
    proc_img = proc_img.reshape(1, img_size[1], img_size[2], 1)

    # Repeat in each RGB channel: (1, 224, 224, 3)
    proc_img = np.repeat(proc_img, img_size[3], axis=3)

    return proc_img


# This function loads in our trained model w/ weights and compiles it 
def load_model(model_path, weight_path):
    # Load the model
    with open(model_path, 'r') as json_file:
        file = json_file.read()
        model = model_from_json(file)
        model.load_weights(weight_path)

    return model

# This function uses our device's threshold parameters to predict whether or not
# the image shows the presence of pneumonia using our trained model
def predict_image(model, img, thresh):
    return 'Pneumonia' if model.predict(img) > thresh else 'Non pneumonia' 

In [10]:
test_dicoms = ['test1.dcm','test2.dcm','test3.dcm','test4.dcm','test5.dcm','test6.dcm']

model_path = "my_model.json"  # path to saved model
weight_path = "xray_class_my_model.best.hdf5"  # path to saved best weights

IMG_SIZE = (1, 224, 224, 3) # This might be different if you did not use vgg16
img_mean = None  # loads the mean image value they used during training preprocessing
img_std = None  # loads the std dev image value they used during training preprocessing

my_model = load_model(model_path, weight_path)  # loads model
thresh = 0.374  # loads the threshold they chose for model classification 

# use the .dcm files to test your prediction
for i in test_dicoms:

    img = np.array([])
    img = check_dicom(i)

    if img is None:
        continue

    img_proc = preprocess_image(img, img_mean, img_std, IMG_SIZE)
    pred = predict_image(my_model, img_proc, thresh)
    print('Prediction: {}'.format(pred))


Load file test1.dcm ...
Image size: (1024, 1024)
Body part: CHEST
Modality: DX
Patient position: PA
Findings: No Finding
Prediction: Non pneumonia

Load file test2.dcm ...
Image size: (1024, 1024)
Body part: CHEST
Modality: DX
Patient position: AP
Findings: Cardiomegaly
Prediction: Non pneumonia

Load file test3.dcm ...
Image size: (1024, 1024)
Body part: CHEST
Modality: DX
Patient position: AP
Findings: Effusion
Prediction: Pneumonia

Load file test4.dcm ...
Image size: (1024, 1024)
Image rejected. Body part should be "CHEST" but it is RIBCAGE


Load file test5.dcm ...
Image size: (1024, 1024)
Body part: CHEST
Image rejected. Modality should be "DX" but it is CT


Load file test6.dcm ...
Image size: (1024, 1024)
Body part: CHEST
Modality: DX
Image rejected. Patient position should be "PA" or "AP" but it is XX

