<a href="https://colab.research.google.com/github/ondraperny/BI-BPR-2019/blob/master/Data_preprocessor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data preprocessor
Modul for preprocessing data. To simplify different types of preprocessed data.

Function:

**Copy whole content(subdirs recursively) to new path while applying filters co copied pictures.**

In [46]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**PATH_ORIGIN**      is path to directory with input data

**PATH_DESTINATION** is path to directory where copy of transformed data will be saved(file structure of transformed dir will remain same)

If we dont want to create copy, just transform data, then set PATH_DESTINATION same as PATH_ORIGIN.

In [0]:
PATH_ORIGIN= "/content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/"
PATH_DESTINATION = "/content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/"

### Data transformation class
currently only class with all required methods

In [47]:
from PIL import Image
from pathlib import Path
import numpy as np
import os

class DataTransformation():
  def __init__(self, PATH_ORIGIN, PATH_DESTINATION):
    # instance variable
    self.PATH_ORIGIN = PATH_ORIGIN
    self.PATH_DESTINATION = PATH_DESTINATION
  
  def correct_dir_path(self, path):
    '''check if string is correct path to directory - ends with slash
    if it doesn't, it adds it
    @return correct path'''
    if path[-1] != '/':
      path = path + '/'
    return path

  def change_path(self, old_path, destination_path, file_path):
    '''take file_path, trim old_path from it and add destination_path instead'''
    old_path = self.correct_dir_path(old_path)
    old_path_len = len(old_path)
    # cut off prefix(old_path) from file_path
    dir_path_from_current = str(file_path)[old_path_len:]
    destination_path = self.correct_dir_path(destination_path)
    return destination_path + dir_path_from_current

  @staticmethod
  def find_all_dirs(in_path):
    '''Return generator to all subdirectories of in_path'''
    for path in Path(in_path).glob("**"):
      yield path

  @staticmethod
  def open_image(image_path, return_in_PIL_format = False):
    '''open image, based on parameter return image type'''
    image_PIL = Image.open(image_path)
    if return_in_PIL_format:
      return image_PIL
    return np.array(image_PIL)

  def save_image(self, image_numpy_array, path_to_dir, image_name):
    path_to_dir = self.correct_dir_path(path_to_dir)
    image = Image.fromarray(image_numpy_array)
    image.save(path_to_dir+image_name+".png")
    print("Image " + image_name + ".png saved in " + path_to_dir)

  @staticmethod
  def generate_image_paths_in_dir(dir_path):
    for image in Path(dir_path).glob("*.png"):
      yield image

  def flow_control(self):
    '''Create directories in new path and copy all images in those directories, as well
    apply filter on image'''
    '''TODO filter function will be passed as parameter'''
    for directory in self.find_all_dirs(self.PATH_ORIGIN):
      # Create target directory & all intermediate directories if don't exists
      new_path = self.change_path(self.PATH_ORIGIN, self.PATH_DESTINATION, directory)
      # print(new_path)
      try:
        os.makedirs(new_path)    
        print("Directory Created: " , new_path)
      except FileExistsError:
        print("Directory Already exists: " , new_path)
      
      cnt = 5
      for image_path in self.generate_image_paths_in_dir(directory):
        if cnt != 0:
          # print("   " + str(image_path))
          cnt = cnt - 1
        
          image = self.open_image(image_path)
          self.save_image(image, new_path, Path(image_path).stem)

  def normalize_brightness(self, image):
    '''FIlter to normalize brightness of pictures'''


trans = DataTransformation(PATH_ORIGIN, PATH_DESTINATION)

trans.flow_control()

#### TMP tests below

# for path in trans.generate_image_paths_in_dir(PATH_ORIGIN+"train/train_negative"):
#   print(path)

# for path in trans.find_all_dirs(PATH_ORIGIN):
#   print(path)

# tmp_path = '/content/drive/My Drive/SKOLA/Bachelor_work/XR_HUMERUS/train/\
# train_negative'
# trans.change_path(PATH_ORIGIN, PATH_DESTINATION, tmp_path)

Directory Already exists:  /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/
Directory Already exists:  /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/train
Directory Already exists:  /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/train/train_negative
Image 0.png saved in /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/train/train_negative/
Image 1.png saved in /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/train/train_negative/
Image 2.png saved in /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/train/train_negative/
Image 3.png saved in /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/train/train_negative/
Image 4.png saved in /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_HUMERUS/train/train_negative/
Directory Already exists:  /content/drive/My Drive/SKOLA/Bachelor_work/preprocessed_data/XR_