# Pain Data Preparation
This notebook prepares the pain dataset in to be able to successfully train a convolutional neural network. Data augmentation techniques such as greyscaling, histogram equalization, etc. are employed.

In [1]:
# Relevant imports
import os
import sys
import numpy as np
import cv2
import matplotlib.pyplot as plt
import math

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from Scripts import Data_Loader_Functions as DL
from Scripts import Image_Processor as IP

In [2]:
# Define folder paths
RAW_DATA = os.path.join(module_path, "Data", "Raw Data", "Pain")
PREPROCESSED_DATA = os.path.join(module_path, "Data", "Preprocessed Data", "Pain")
AUGMENTED_DATA = os.path.join(module_path, "Data", "Augmented Data", "Pain")

## Create Folder Structure
First, we will duplicate the folder structure in "Raw Data" into "Preprocessed Data" and "Augmented Data".

In [3]:
# Duplicate folder structure
DL.mirror_folder_structure(RAW_DATA, PREPROCESSED_DATA)
DL.mirror_folder_structure(RAW_DATA, AUGMENTED_DATA)

## Process Images
We will now process the images. Preprocessing includes converting to greyscale, and histogram equalization.

In [None]:
# Preprocess images
IP.bulk_process_images(RAW_DATA, PREPROCESSED_DATA, ".jpg")

In [4]:
# Flip images and copy originals into augmented data folder
IP.bulk_augment_images(PREPROCESSED_DATA, AUGMENTED_DATA, ".jpg", "flip", "pain", label_threshold=-1)
IP.bulk_augment_images(PREPROCESSED_DATA, AUGMENTED_DATA, ".jpg", "original", "pain", label_threshold=-1)

In [None]:
# Rotate Originals and flipped images, and ensure that naming conventions stay consistent
IP.bulk_augment_images(AUGMENTED_DATA, AUGMENTED_DATA, "_flipped.jpg", "rotate_crop", "pain", label_threshold=-1)
IP.bulk_augment_images(AUGMENTED_DATA, AUGMENTED_DATA, "_original.jpg", "rotate_crop", "pain", label_threshold=-1)
IP.bulk_rename_files(AUGMENTED_DATA, AUGMENTED_DATA, "_rotated", "_straight")

In [5]:
# Downsample augmented data
DL.downsample_data(os.path.join(AUGMENTED_DATA, "training"))
DL.downsample_data(os.path.join(AUGMENTED_DATA, "test"))

## Verify Images
In this part we check that the image augmentation had the desired results.

In [6]:
# Load all images into numpy array
PAIN_TRAIN = os.path.join(AUGMENTED_DATA, "training")
PAIN_TEST = os.path.join(AUGMENTED_DATA, "test")
train_data, train_labels, test_data, test_labels = DL.load_pain_data(PAIN_TRAIN, '', None)

0 images processed
1000 images processed
2000 images processed
3000 images processed
4000 images processed
5000 images processed
6000 images processed
7000 images processed
8000 images processed
9000 images processed
10000 images processed
11000 images processed
12000 images processed


In [7]:
# Show Flipped/Original Distribution for Train and Test
print(np.unique(train_labels[:,-1], return_counts=True))
# print(np.unique(test_labels[:,-1], return_counts=True))

(array(['flipped', 'original'], dtype='<U8'), array([6146, 6186]))


In [8]:
# Show exemplary label
train_labels[0]

array(['109', '5', '0', '12', '5', 'flipped'], dtype='<U8')

In [None]:
# Reduce Pain Labels down to 0/1
pain_label = 4
max_pain_level = 1

train_labels = DL.reduce_pain_label_categories(train_labels[:,pain_label].astype(np.int), max_pain=max_pain_level)
# test_labels = DL.reduce_pain_label_categories(test_labels[:,pain_label].astype(np.int), max_pain=max_pain_level)

In [None]:
# Show pain distribution for train and test
print(np.unique(train_labels, return_counts=True))
print(np.unique(test_labels, return_counts=True))

In [None]:
# Explore the results
print("Train Data: Shape", train_data.shape)
print("Train Labels: Shape", train_labels.shape)
print("Test Data: Shape", test_data.shape)
print("Test Labels: Shape", test_labels.shape)

## Model Testing
Here we bild a simple Keras model

In [None]:
def build_cnn(input_shape):
    """
    Compile and return a simple CNN model for image recognition.

    Configuration:
    Layer 1: Convolution Layer | Filters: 32 | Kernel Size: 3x3 | Activation: Relu
    Layer 2: Max Pooling Layer | Filter: 2x2
    Layer 3: Dense Layer       | Neurons: 32 | Activation: Relu
    Layer 4: Dense Layer       | Neurons: 10 | Activation: Softmax

    Optimizer:      Adam
    Loss function:  Sparse Categorical Cross Entropy
    Loss metric:    Accuracy


    :param input_shape:     image input shape (tuple), e.g. (28, 28, 1)

    :return:
        model               compiled tensorflow model
    """

    # Set up model type
    model = models.Sequential()

    # Add layers
    model.add(layers.Conv2D(32, (5, 5), input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (5, 5)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (5, 5)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(2, activation='softmax'))

    return model

In [None]:
# Imports
import tensorflow as tf
models = tf.keras.models  # like 'from tensorflow.keras import models' (PyCharm import issue workaround)
layers = tf.keras.layers  # like 'from tensorflow.keras import layers' (PyCharm import issue workaround)
optimizers = tf.keras.optimizers  # like 'from tensorflow.keras import optimizers' (PyCharm import issue workaround)

In [None]:
model = build_cnn(train_data[0].shape)

In [None]:
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(train_data[:500], train_labels[:500], epochs=1, batch_size=32, validation_split=0,  use_multiprocessing=True)