In [10]:
import os
import pandas as pd
import numpy as np
from skimage.io import imread
from skimage import feature

%matplotlib inline


TRAINING_IMAGES_PATH = '/Users/nmanna/workspace/SYDE522/boneage/rsna-bone-age/boneage-training-dataset'
TRAINING_DATA_CSV_PATH = '/Users/nmanna/workspace/SYDE522/boneage/rsna-bone-age/boneage-training-dataset.csv'

training_images = [
    os.path.join(TRAINING_IMAGES_PATH, image_name)
    for image_name in os.listdir(TRAINING_IMAGES_PATH)
]

training_data = pd.read_csv(TRAINING_DATA_CSV_PATH)


def load_image(image_path):
    return imread(image_path)


    

In [11]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )


In [12]:
class LocalBinaryPatterns:
    def __init__(self, numPoints, radius):
        # store the number of points and radius
        self.numPoints = numPoints
        self.radius = radius
 
    def describe(self, image, eps=1e-7):
        # compute the Local Binary Pattern representation
        # of the image, and then use the LBP representation
        # to build the histogram of patterns
        lbp = feature.local_binary_pattern(image, self.numPoints,
        self.radius, method="uniform")
        (hist, _) = np.histogram(lbp.ravel(),
            bins=np.arange(0, self.numPoints + 3),
            range=(0, self.numPoints + 2))
 
        # normalize the histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
 
        # return the histogram of Local Binary Patterns
        return hist
    
desc = LocalBinaryPatterns(24, 3)

preprocessed_entries = []
for image_name in log_progress(training_images, every=1):
    image_id = os.path.splitext(os.path.split(image_name)[-1])[0]
    
    csv_row = training_data.query(f'id == {image_id}')
    if (csv_row.id.values):
        boneage = csv_row.boneage.values[0]
        is_male = csv_row.male.values[0]
    else:
        continue
    
    img = load_image(image_name)
    lbp_descriptor = desc.describe(img)
    
    preprocessed_entries.append([int(image_id), boneage, int(is_male)] + list(lbp_descriptor))
    
np.save('preprocessed_lbp', preprocessed_entries)
    

In [14]:
# split into format more inline with assignment 3
# load from disk
preprocessed_data = np.load('preprocessed_lbp.npy')
preprocessed_data_x = [x[2:] for x in preprocessed_data]
preprocessed_data_y = [x[1] for x in preprocessed_data]
np.save('preprocessed_x', preprocessed_data_x)
np.save('preprocessed_y', preprocessed_data_y)