In [1]:
from os import listdir
from os.path import isfile, join
from PIL import Image
import numpy as np
import random

In [10]:
dataset_path = "data/"
subsets = [d for d in listdir(dataset_path)]

<h1> Resizing </h1>

In [24]:
TARGET_RES = (1920,1090)

In [None]:
'''
I ONLY RAN THIS ONCE TO RESIZE EVERYTHING TO THE SAME RES - DO NOT RUN AGAIN
LEFT IT HERE ONLY FOR REFERENCE

subset_index = 0

for dir_name in subsets:
    
    subset_path = dataset_path + dir_name + "/"
    subset_files = [f for f in listdir(subset_path) if isfile(join(subset_path, f))]
    subset_files.sort()
    
    for filename in subset_files:
        file_path = subset_path + filename
        image = Image.open(file_path)
        if not image.size == TARGET_RES:
            image = image.resize(TARGET_RES)
            image.save(file_path[:-4] + "_resized.png")

    print("Processed subset " + str(subset_index + 1) + "/" + str(len(subsets)) )
    subset_index += 1
'''

<h1> Processing </h1>

In [5]:
CROP_RECTANGLE = (610, 190, 1300, 900)
NUM_CLASSES = 5

In [6]:
def load_and_crop_images():
    '''
    returns:
        images_unnormalized: List of lists "subset":
            subset: List of tuples "(image, label)" containing one set of the exercise
                image: Numpy array with ultrasound image resized and cropped to a fixed size and area
                label: Integer from 0 to 4 indicating the fatigue class
                       where the fatigue class indicates the number of reps that the muscle
                       has been exposed to. The number of reps is divided into roughly equal
                       fatigue classes of approximately size n, the first n reps belong to
                       fatigue class 0, the next n reps to fatigue class 1 etc.
                       It will be easier for a neural net to predict a fatigue class
                       than a specific rep number.
        baseline_brightness_per_subset: Average grey value across first images in each subset
        num_images: total number of images
    '''
    # Keeps mean pixel brightness of the first image in every subset
    baseline_brightness_per_subset = np.zeros(len(subsets))
    images_unnormalized = []

    subset_index = 0
    num_images = 0
    for dir_name in subsets:
        # Directory level bookkeeping
        subset_path = dataset_path + dir_name + "/"
        subset_files = [f for f in listdir(subset_path) if isfile(join(subset_path, f))]
        # Filenames correspond to the order in which the images were taken so
        # higher number in file name -> more reps done on the muscle
        # We sort to keep them in the rep order
        subset_files.sort()
        # Calculating reps per fatigue class
        num_files_in_subset = len(subset_files)
        class_modulus = np.ceil(num_files_in_subset / NUM_CLASSES)
        
        subset_images_unnormalized = []

        file_in_subset_index = 0
        for filename in subset_files:
            # Path to the file
            file_path = subset_path + filename
            image = Image.open(file_path)
            num_images += 1
            # Cropping the padding away
            image = image.crop(CROP_RECTANGLE)
            # Convert to grayscale so that the resulting numpy array doesn't have 3 RGB channels
            image = image.convert("L")
            # Converting to numpy array for normalization later
            image_as_array = np.asarray(image)

            if (file_in_subset_index == 0):
                # If the file is the first one in the subset, compute the mean brightness
                baseline_brightness_per_subset[subset_index] = np.floor(image_as_array.mean())

            # Label and add to return array
            label = np.floor(file_in_subset_index / class_modulus)
            subset_images_unnormalized.append((image_as_array, label))

            file_in_subset_index += 1

        images_unnormalized.append(subset_images_unnormalized)
        print("Processed subset " + str(subset_index + 1) + "/" + str(len(subsets)) )
        subset_index += 1

    return images_unnormalized, baseline_brightness_per_subset.mean(), num_images

<h1> Normalization </h1>

In [7]:
IMAGE_WIDTH = 710
IMAGE_HEIGHT = 690

In [8]:
def normalize_images(images, baseline_brightness, num_images):
    '''
    returns:
        X_data: Array of length num_images of arrays "flattened_image"
            flattened_image: Array of length IMAGE_WIDTH * IMAGE_HEIGHT
                             containing flattened, normalized image pixels.
                             Normalization is done by adding modifier_i to the first image
                             of set i such that the average grey values of all first images are
                             approximately equal. modifier_i is also added to all other images
                             in set i to preserve relative brightness differences.
        y_data: Array of length num_images of arrays "label"
            label: Array of length NUM_CLASSES such that y_data[i] is the
                   label for X_data[i], with label[j] == 1 if X_data[i]
                   is labeled j and 0 otherwise.
    '''
    X_data = np.zeros((num_images, IMAGE_WIDTH * IMAGE_HEIGHT))
    y_data = np.zeros((num_images, NUM_CLASSES))

    subset_index = 0
    absolute_image_index = 0
    for subset in images:
        relative_image_index = 0
        brightness_modifier = 0
        
        for image_label_tuple in subset:
            image = image_label_tuple[0]
            label = int(image_label_tuple[1])
            
            # If this is the first image in the set, compute a new brightness modifier
            if (relative_image_index == 0):
                brightness_modifier = baseline_brightness - image.mean()

            image_normalized = image + brightness_modifier
            X_data[absolute_image_index] = image_normalized.flatten()
            y_data[absolute_image_index][label] = 1
            
            relative_image_index += 1
            absolute_image_index += 1

        subset_index += 1

    return X_data, y_data

<h1> Main </h1>

In [11]:
images, baseline_brightness, num_images = load_and_crop_images()
X,y = normalize_images(images, baseline_brightness, num_images)

Processed subset 1/20
Processed subset 2/20
Processed subset 3/20
Processed subset 4/20
Processed subset 5/20
Processed subset 6/20
Processed subset 7/20
Processed subset 8/20
Processed subset 9/20
Processed subset 10/20
Processed subset 11/20
Processed subset 12/20
Processed subset 13/20
Processed subset 14/20
Processed subset 15/20
Processed subset 16/20
Processed subset 17/20
Processed subset 18/20
Processed subset 19/20
Processed subset 20/20
