# Create Normalized Dataset Using Color Constancy

Import libraries

In [None]:
from __future__ import division
from fastai.vision import Path
import os
import numpy
import cv2

Initialize path to original dataset

In [None]:
data_path = Path('data')

Specify directory names of the modified dataset

In [None]:
new_data_directory = 'xxx_data'
data_splits = ['train', 'valid', 'test']
data_classes = ['melanoma', 'nevus', 'seborrheic_keratosis']

Create a function to normalize the colors of original images using a color constancy algorithm called Shades of Gray as discussed in the paper titled [Improving Dermoscopy Image Classification Using Color Constancy](http://vislab.isr.ist.utl.pt/wp-content/uploads/2016/02/15_JBHI_2.pdf) with the implementation by [Nick Shawn](https://github.com/nickshawn/Shades_of_Gray-color_constancy_transformation).

In [None]:
def color_constancy(img, power=6, gamma=None):
    """
    Parameters
    ----------
    img: 2D numpy array
        The original image with format of (h, w, c)
    power: int
        The degree of norm, 6 is used in reference paper
    gamma: float
        The value of gamma correction, 2.2 is used in reference paper
    """
    img_dtype = img.dtype
    
    if gamma is not None:
        img = img.astype('uint8')
        look_up_table = numpy.ones((256,1), dtype='uint8') * 0
        for i in range(256):
            look_up_table[i][0] = 255*pow(i/255, 1/gamma)
        img = cv2.LUT(img, look_up_table)

    img = img.astype('float32')
    img_power = numpy.power(img, power)
    rgb_vec = numpy.power(numpy.mean(img_power, (0,1)), 1/power)
    rgb_norm = numpy.sqrt(numpy.sum(numpy.power(rgb_vec, 2.0)))
    rgb_vec = rgb_vec/rgb_norm
    rgb_vec = 1/(rgb_vec*numpy.sqrt(3))
    img = numpy.multiply(img, rgb_vec)
    
    return img.astype(img_dtype)

Create a function to generate new data by applying the aforementioned algorithm

In [None]:
def generate_new_data(power, gamma):
    for data_split in data_splits:

        data_split_path = new_data_directory + "/" + data_split
        if not os.path.isdir(data_split_path):
            print("creating ", data_split_path)
            os.mkdir(data_split_path)

        for data_class in data_classes:
            data_class_path = new_data_directory + "/" + data_split + "/" + data_class
            if not os.path.isdir(data_class_path):
                print("creating ", data_class_path)
                os.mkdir(data_class_path)

            image_file_paths = (data_path/data_split/data_class).ls()

            for image_file_path in image_file_paths:
                final_image_file_path = new_data_directory + "/" + "/".join(str(image_file_path).split("/")[1:])
                image = cv2.imread(str(image_file_path))
                final_image = color_constancy(image, power=power, gamma=gamma)
                cv2.imwrite(final_image_file_path, final_image)

Generate new data

In [None]:
generate_new_data(6, 2.2)