In [31]:
import numpy as np
import pandas as pd
import pydicom
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import keras 
from keras.models import model_from_json
from skimage.transform import resize

In [32]:
# This function reads in a .dcm file, checks the important fields for our device, and returns a numpy array
# of just the imaging data
def print_dicom(filename): 
    ds = pydicom.dcmread(filename)
    img = ds.pixel_array
    img_mean = np.mean(img)
    img_std = np.std(img)
    print('*********************************************')
    print('Patient Id:', ds.PatientID)
    print('Study Description:',ds.StudyDescription)
    print('Patient Gender:', ds.PatientSex)
    print('Patient Age:', ds.PatientAge)
    print('Patient Position:', ds.PatientPosition)
    print('Body Part Examined:', ds.BodyPartExamined)
    return img, img_mean, img_std

def check_dicom(filename): 
    print('Check file data {} ...'.format(filename))
    ds = pydicom.dcmread(filename)
    try:
        assert(ds.BodyPartExamined == 'CHEST')
        assert(ds.Modality == 'DX')
        assert(ds.PatientPosition in ['AP', 'PA'])
    except Exception as e:
        print('File {} contains invalid data - MODELS SHOULD NOT BE USED'.format(filename))
        print(e)
        return None,0,0
    
# This function takes the numpy array output by check_dicom and 
# runs the appropriate pre-processing needed for our model input
def preprocess_image(img,img_mean,img_std,img_size): 
    proc_img = img.copy()
    proc_img = (proc_img - img_mean)/img_std
    proc_img = resize(img, img_size)
    return proc_img

# This function loads in our trained model w/ weights and compiles it 
def load_model(model_path, weight_path):
    json_file = open(model_path, "r")
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights(weight_path)
    return model

# This function uses our device's threshold parameters to predict whether or not
# the image shows the presence of pneumonia using our trained model
def predict_image(model, img, thresh): 
    pred_Y = model.predict(img, batch_size = 1, verbose = True)
    prediction = '------------------------------------------------'
    print(pred_Y)
    if pred_Y[0][0] > thresh:
        prediction = 'With Pneumonia'
    else:
        prediction = 'Without Pneumonia'
    return prediction

In [33]:
test_dicoms = ['test1.dcm','test2.dcm','test3.dcm','test4.dcm','test5.dcm','test6.dcm']

model_path = "my_model.json" #path to saved model
weight_path = "my_model.hdf5" #path to saved best weights

IMG_SIZE=(1,224,224,3) # This might be different if you did not use vgg16

my_model = load_model(model_path, weight_path)
thresh = 0.518612585 #loads the threshold they chose for model classification 

# use the .dcm files to test your prediction
for i in test_dicoms:
    img = np.array([])
    
    img, img_mean, img_std = print_dicom(i)
    check_dicom(i)
    if img is None:
        continue
        
    img_proc = preprocess_image(img,img_mean,img_std,IMG_SIZE)
    pred = predict_image(my_model,img_proc,thresh)
    print(pred)

*********************************************
Patient Id: 2
Study Description: No Finding
Patient Gender: M
Patient Age: 81
Patient Position: PA
Body Part Examined: CHEST
Check file data test1.dcm ...
[[0.53550315]]
With Pneumonia
*********************************************
Patient Id: 1
Study Description: Cardiomegaly
Patient Gender: M
Patient Age: 58
Patient Position: AP
Body Part Examined: CHEST
Check file data test2.dcm ...
[[0.5414694]]
With Pneumonia
*********************************************
Patient Id: 61
Study Description: Effusion
Patient Gender: M
Patient Age: 77
Patient Position: AP
Body Part Examined: CHEST
Check file data test3.dcm ...
[[0.54892963]]
With Pneumonia
*********************************************
Patient Id: 2
Study Description: No Finding
Patient Gender: M
Patient Age: 81
Patient Position: PA
Body Part Examined: RIBCAGE
Check file data test4.dcm ...
File test4.dcm contains invalid data - MODELS SHOULD NOT BE USED

[[0.53550315]]
With Pneumonia
********

### I'm going to read the images and check their labels

In [34]:
import glob
mydicoms = glob.glob("*.dcm")

In [35]:
dcm1 = pydicom.dcmread(mydicoms[0])
dcm1.StudyDescription

'Cardiomegaly'

In [36]:
all_data = []

for i in mydicoms: 
    dcm = pydicom.dcmread(i)
    fields = [dcm.PatientID, int(dcm.PatientAge), dcm.PatientSex, dcm.Modality, dcm.StudyDescription,
             dcm.Rows, dcm.Columns]
    all_data.append(fields)

In [37]:
mydata = pd.DataFrame(all_data, 
                      columns = ['PatientID','PatientAge','PatientSex','Modality','Findings','Rows','Columns'])
mydata.head(10)

Unnamed: 0,PatientID,PatientAge,PatientSex,Modality,Findings,Rows,Columns
0,1,58,M,DX,Cardiomegaly,1024,1024
1,2,81,M,CT,No Finding,1024,1024
2,2,81,M,DX,No Finding,1024,1024
3,2,81,M,DX,No Finding,1024,1024
4,2,81,M,DX,No Finding,1024,1024
5,61,77,M,DX,Effusion,1024,1024
