<a href="https://colab.research.google.com/github/ondraperny/BI-BPR-2019/blob/master/Data_preprocessor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data preprocessor
Modul for preprocessing data. To simplify different types of preprocessed data.

Function:

**Copy whole content(subdirs recursively) to new path while applying filters co copied pictures.**

In [0]:
import sys
# check if code run on colab or local
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
  # Mount google drive
  from google.colab import drive
  drive.mount('/content/drive')

**PATH_ORIGIN**      is path to directory with input data

**PATH_DESTINATION** is path to directory where copy of transformed data will be saved(file structure of transformed dir will remain same)

If we dont want to create copy, just transform data, then set PATH_DESTINATION same as PATH_ORIGIN.

In [0]:
PATH_ORIGIN= "/content/drive/My Drive/SKOLA/Bachelor_work/XR_HUMERUS/"
PATH_DESTINATION = "/content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/"

PATH_ORIGIN_LOCAL = "G:\\My Drive\\SKOLA\\Bachelor_work\\XR_HUMERUS\\"
PATH_DESTINATION_LOCAL = "G:\\My Drive\\SKOLA\\Bachelor_work\\preprocessed_data\\XR_HUMERUS\\"

### Data transformation class
currently only class with all required methods

In [0]:
from PIL import Image
from pathlib import Path
import numpy as np
import os
import IPython.display

class DataTransformation():
  """Encapsulate functions that are neccesary for processing images
  and handling file structure
  
  Attributes
  ----------
  PATH_ORIGIN : str
    valid path to dir with input data
  PATH_DESTINATION : str
    path to dir with output data (path does not have to exist yet)

  Methods
  -------
  correct_dir_path(path)
    If path string does not end with '\', it adds it to the path
  change_path(old_path, destination_path, file_path)
    Change path of file from old dir to a new one
  generate_subdirs(in_path)
  generate_image_paths(dir_path)
  open_image(image_path, return_in_PIL_format = False)
  save_image(image_numpy_array, path_to_dir, image_name)
  show_images(*images)
  copy_imgs_apply_filters(self, max_images_per_dir, *filters)
    Loads images from from PATH_ORIGIN, pply every filter function to an images
    and save in same file structure
  """

  def __init__(self, PATH_ORIGIN, PATH_DESTINATION):
    self.PATH_ORIGIN = PATH_ORIGIN
    self.PATH_DESTINATION = PATH_DESTINATION
  
  def correct_dir_path(self, path):
    """check if string is correct path to directory - ends with slash
    if it doesn't, it adds it
    @return correct path"""
    if path[-1] != '/' and path[-1] != '\\':
      if IN_COLAB:
        path = path + '/'
      else:
        path = path + '\\'
    return path

  def change_path(self, old_path, destination_path, file_path):
    """take file_path, trim old_path from it and add destination_path instead"""
    old_path = self.correct_dir_path(old_path)
    old_path_len = len(old_path)
    # cut off prefix(old_path) from file_path
    dir_path_from_current = str(file_path)[old_path_len:]
    destination_path = self.correct_dir_path(destination_path)
    return destination_path + dir_path_from_current

  @staticmethod
  def generate_subdirs(dir_path):
    """Return generator to all subdirectories of in_path"""
    for path in Path(dir_path).glob("**"):
      yield path

  @staticmethod
  def generate_image_paths(dir_path):
    """Generator with path to every png image in specified dir"""
    for image in Path(dir_path).glob("*.png"):
      yield image

  @staticmethod
  def open_image(image_path, return_in_PIL_format = False):
    """Open image, based on parameter return image type"""
    try:
      image_PIL = Image.open(image_path).convert('L')
      if return_in_PIL_format:
        return image_PIL
      else:
        return np.array(image_PIL)
    except:
      print("Can't open image \"" + str(image_path) + "\"")



  def save_image(self, image_numpy_array, path_to_dir, image_name):
    """Save image to specified path, with specified name"""
    path_to_dir = self.correct_dir_path(path_to_dir)
    image = Image.fromarray(image_numpy_array)
    image.save(path_to_dir+image_name+".png")
    print("Image " + image_name + ".png saved in " + path_to_dir)

  @staticmethod
  def show_images(*images):
    for img in images:
      IPython.display.display(Image.fromarray(img))

  def copy_imgs_apply_filters(self, max_images_per_dir, *filters):
    """Create directories in new path and copy all images in those directories, as well
    apply filter on image
    
    Parameters
    ----------
    max_images_per_dir : int
      maximum amount of processed images per directory 
    """

    for directory in self.generate_subdirs(self.PATH_ORIGIN):
      # Create target directory & all intermediate directories if don't exists
      new_path = self.change_path(self.PATH_ORIGIN, self.PATH_DESTINATION, directory)
      # print(new_path)
      try:
        os.makedirs(new_path)    
        print("Directory Created: " , new_path)
      except FileExistsError:
        print("Directory Already exists: " , new_path)
      
      cnt = 0
      for image_path in self.generate_image_paths(directory):
        image = self.open_image(image_path)
        for filter in filters:
          image = filter(image)         
        self.save_image((image), new_path, Path(image_path).stem)

        if max_images_per_dir == 0:
          continue
        elif cnt >= max_images_per_dir:
          break
        else:
          cnt = cnt + 1

### Specific filters that can be used on data
Encapsulated in class ImageFilters

In [0]:
import cv2

class ImageFilters:
  """Encapsulate all image filter functions
  
  Methods
  -------
  smoother(img)
  histogram_equalization(img)
  clahe(img)
  """

  @staticmethod
  def smoother(img):
    """smoother the image"""
    kernel = np.ones((5,5),np.float32)/25
    return cv2.filter2D(img,-1,kernel)

  @staticmethod
  def histogram_equalization(img):
    """Spread most frequent intensity values, creating better contrast image"""
    return cv2.equalizeHist(img)
  
  @staticmethod
  def clahe(img):
    """Contrast limited adaptive histogram equalization - works with adaptive
    areas with small contrast amplification"""
    clahe =  cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    return clahe.apply(img)

  @staticmethod
  def find_region_of_interest(img):
    """TODO: function to find ROI"""    

In [0]:
if IN_COLAB:
  trans = DataTransformation(PATH_ORIGIN, PATH_DESTINATION)
else:
  trans = DataTransformation(PATH_ORIGIN_LOCAL, PATH_DESTINATION_LOCAL)

filters = ImageFilters()


In [0]:
trans.copy_imgs_apply_filters(5, filters.find_region_of_interest)