In [1]:
import os
import numpy as np
import pandas as pd
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from skimage import morphology, io, exposure, img_as_float, transform, img_as_ubyte
from matplotlib import pyplot as plt

Using TensorFlow backend.


In [16]:
def loadDataGeneral(df, path, im_shape):
    """
    reshaping images
    """
    X = []
    for i, item in df.iterrows():
        img = img_as_float(io.imread(path + '/' + item[0]))
        img = transform.resize(img, im_shape)
        img = exposure.equalize_hist(img)
        img = np.expand_dims(img, -1)
        X.append(img)

    X = np.array(X)
    X -= X.mean()
    X /= X.std()

    return X

In [3]:
def remove_small_regions(img, size):
    """
    Morphologically removes small (less than size)
    connected regions of 0s or 1s.
    """
    img = morphology.remove_small_objects(img, size)
    img = morphology.remove_small_holes(img, size)

    return img

In [8]:
def distribution(gray_img):
    """
    calculate the distribution of img,
    total will be used on calculating fraction
    """
    img_shape = gray_img.shape
    # changed gray scale value into 0-16
    gray_distribution = [0] * 16
    total = 0
    for i in range(0, img_shape[0]):
        for j in range(0, img_shape[1]):
            gray_distribution[int(gray_img[i][j])] += 1
            total += int(gray_img[i][j])

    return gray_distribution, total

In [5]:
def lung_density(pr, img):
    """calculate the density of two lungs."""
    density = 0
    size = 0
    img_shape = img.shape
    for i in range(0, img_shape[0]):
        for j in range(0, img_shape[1]):
            if pr[i][j] == 1:
                size += 1
                density += img[i][j]

    return density * 1.0 / (size + 1)

In [6]:
def size_of_lungs(img):
    # img = read_the_image_file()
    t = [[img[j][i] for j in range(len(img))] for i in range(len(img[0]))]
    r = 0  # right (in actual body) lung
    l = 0
    s = 0  # starting point
    m = 0  # middle_separation point
    for line in range(len(t)):
        if sum(t[line][:]) > 0:
            s = line
            break
        else:
            continue
    for line in range(s, len(t)):
        if sum(t[line][:]) == 0:
            m = line
            break
        else:
            s_r = sum(t[line][:])
            r = r + s_r
    for line in range(m, len(t)):
        s_l = sum(t[line][:])
        l = l + s_l

    fraction = round(l/r, 3)
    # print("Right lung: %s" % r, "\nLeft lung size: %s" % l, "\nFraction between right and left lung: %s" % fraction)

    return r, l, fraction

In [10]:
def extract_features(model, folder, df, savefile):
    """
    this function will combine features:
    gray scale value distribution,
    density of lungs,
    ...
    """
    features = []

    # Load test data
    im_shape = (256, 256)
    X = loadDataGeneral(df, folder, im_shape)

    # stop when it arrive the length of X
    n_test = X.shape[0]
    inp_shape = X[0].shape

    # Load model
    UNet = load_model(model)

    # For inference standard keras ImageGenerator can be used.
    test_gen = ImageDataGenerator(rescale=1.)

    i = 0
    for xx in test_gen.flow(X, batch_size=1):
        # feature = []
        img = exposure.rescale_intensity(np.squeeze(xx), out_range=(0, 1))
        # I'm still thinking about how to deal with the gray scale
        # img = img_as_ubyte(img)
        # convert image gray scale into 0-16
        img = img * 15
        img = img.astype(dtype=np.int8)
        pred = UNet.predict(xx)[..., 0].reshape(inp_shape[:2])
        pr = pred > 0.5
        pr = remove_small_regions(pr, 0.02 * np.prod(im_shape))
        pr_int = np.array(pr, dtype=np.int8)

        dist, total = distribution(img)
        dist.append(total)
        dist.append(lung_density(pr_int, img))
        r, l, fraction = size_of_lungs(pr_int)
        dist.append(r)
        dist.append(l)
        dist.append(fraction)

        features.append(dist)
        # np.savetxt('test.out', pr_int, delimiter='', fmt="%s")

        i += 1
        if i == n_test:
            break

    np_features = np.array(features)
    np.savetxt(savefile, np_features, delimiter=',', fmt="%s")
    # return features

In [11]:
# extract features from normal
model = '/Users/shengbo/shengbo/VU/ML/chest_xray/lung-segmentation-2d/trained_model.hdf5'
folder = '/Users/shengbo/shengbo/VU/ML/chest_xray/val/NORMAL'
files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
df = pd.DataFrame(data=files, columns={'img'})
df[df['img'] == '.DS_Store'] = None
df = df.dropna()
extract_features(model, folder, df, 'normal_val.csv')

In [21]:
# extract features from pneumonia
model = '/Users/shengbo/shengbo/VU/ML/chest_xray/lung-segmentation-2d/trained_model.hdf5'
folder = '/Users/shengbo/shengbo/VU/ML/chest_xray/val/PNEUMONIA'
files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
df = pd.DataFrame(data=files, columns={'img'})
df[df['img'] == '.DS_Store'] = None
df = df.dropna()
extract_features(model, folder, df, 'pneumonia_val.csv')

In [20]:
model = '/Users/shengbo/shengbo/VU/ML/chest_xray/lung-segmentation-2d/trained_model.hdf5'
folder = '/Users/shengbo/shengbo/VU/ML/chest_xray/test/NORMAL'
files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
df = pd.DataFrame(data=files, columns={'img'})
df[df['img'] == '.DS_Store'] = None
df = df.dropna()
extract_features(model, folder, df, 'normal_test.csv')

In [22]:
nor_df = pd.read_csv('normal_val.csv')

In [28]:
nor_df

Unnamed: 0,11784.0,3812.0,3813.0,3801.0,3837.0,3815.0,3819.0,3853.0,3783.0,3849.0,...,3799.0,3786.0,3903.0,4038.0,13.0,405051.0,3.4634116718285055,10035.0,6431.0,0.641
0,11032.0,3842.0,3847.0,3883.0,3847.0,3854.0,3834.0,3879.0,3829.0,3921.0,...,3831.0,3943.0,3774.0,4384.0,40.0,412376.0,4.778062,7816.0,5867.0,0.751
1,10313.0,3889.0,3917.0,3919.0,3942.0,3899.0,3927.0,3876.0,3965.0,3903.0,...,3821.0,4092.0,3992.0,4132.0,63.0,416968.0,4.522282,8764.0,6449.0,0.736
2,10724.0,3937.0,3903.0,3882.0,3905.0,3888.0,3901.0,3922.0,3876.0,3852.0,...,3931.0,3913.0,3972.0,4019.0,11.0,412249.0,5.565868,7196.0,4903.0,0.681
3,9273.0,3985.0,3951.0,4004.0,4019.0,3985.0,3987.0,4021.0,3968.0,3967.0,...,4029.0,3906.0,4038.0,4298.0,74.0,424693.0,4.839244,8599.0,6062.0,0.705
4,9871.0,3918.0,3927.0,3967.0,3921.0,3991.0,3930.0,3944.0,3934.0,3914.0,...,4078.0,4075.0,3975.0,4146.0,46.0,420355.0,4.429941,9545.0,6205.0,0.65
5,9425.0,3973.0,3943.0,4018.0,3963.0,4012.0,3998.0,3992.0,4084.0,3989.0,...,4017.0,4027.0,4011.0,4082.0,23.0,422267.0,3.899186,11070.0,8351.0,0.754
6,4078.0,4674.0,4311.0,4394.0,4346.0,4333.0,4350.0,4291.0,4336.0,4301.0,...,4293.0,4319.0,4392.0,4844.0,2.0,461774.0,6.848019,8272.0,4597.0,0.556
