In [1]:
# imports
from src.image_preprocess import ImageProcess
import os
import glob
import yaml
import random
import shutil


In [2]:
# Get settings from yaml file
with open("settings.yaml", 'r') as s:
    sets = yaml.safe_load(s)

path_input = sets['path_prepreprocessed_db']
path_output = sets['path_preprocessed_db']
im_sz = sets['img_size']
im_resizing_policy = sets['resizing_policy']
im_resizing_method = sets['resizing_method']
im_max_hz = sets['max_hz']
im_gaussian_mu = sets['gaussian_mean']
im_gaussian_std = sets['gaussian_std']
im_holes_amount = sets['hole_amount']
im_holes_radius = sets['hole_radius']

save_data_split = sets['data_split']


In [3]:
# split images into 3 categories - train, test and val


input_images_paths = glob.glob(os.path.join(path_input, '*.*'))
im_amount = len(input_images_paths)

train_samples_am = int(im_amount * save_data_split[0])
test_samples_am = int(im_amount * save_data_split[1])
val_samples_am = im_amount - train_samples_am - test_samples_am

# get train images
train_samples = random.sample(input_images_paths, train_samples_am)
_ = [input_images_paths.remove(x) for x in train_samples]
# get test images
test_samples = random.sample(input_images_paths, test_samples_am)
_ = [input_images_paths.remove(x) for x in test_samples]
# get val samples
val_samples = input_images_paths.copy()

images_paths = [train_samples, test_samples, val_samples]
images_categories = ['train', 'test', 'val']




filelist_dict = dict()
for paths, category in zip(images_paths, images_categories):
    subfix_image = [os.path.split(path)[-1] for path in paths]
    subfix_image = [i.split('.')[0] + '.bmp' for i in subfix_image]

    input_path = [os.path.join(path_output, category, 'input', i) for i in subfix_image]
    output_path = [os.path.join(path_output, category, 'output', i) for i in subfix_image]

    image_dict = dict()
    for n in range(len(paths)):
        image_dict[n] = {
            'source': paths[n],
            'output': output_path[n],
            'input': input_path[n],
        }

    inside_dict = {
        'folder_path': os.path.join(path_output, category),
        'images': image_dict,
    }
    filelist_dict[category] = inside_dict


# create folders for preprocessed images
for val in filelist_dict.values():
    folder_name = val['folder_path']
    if os.path.exists(folder_name):
        shutil.rmtree(folder_name)

    os.makedirs(folder_name)
    os.makedirs(os.path.join(folder_name, 'input'))
    os.makedirs(os.path.join(folder_name, 'output'))




In [4]:
# PREPROCESSING

for val in filelist_dict.values():
    for image in val['images'].values():
        input_path = image['input']
        output_path = image['output']
        source_name = image['source']

        img = ImageProcess(source_name)
        img.resize_image(im_sz, im_resizing_policy, im_resizing_method)
        img.save_image(input_path)
        # apply transformations

        # select random properties of distortion
        max_hz = random.uniform(im_max_hz[0], im_max_hz[1])
        mu = random.uniform(im_gaussian_mu[0], im_gaussian_mu[1])
        sigma = random.uniform(im_gaussian_std[0], im_gaussian_std[1])

        hole_amount = random.randint(im_holes_amount[0], im_holes_amount[1])

        # first, lowpass filter it
        img.dist_lowpass(max_hz)
        # then, apply gaussian noise
        img.dist_noise_gaussian(mu, sigma)
        # finally, add holes
        img.dist_blackholes(im_holes_radius[0], im_holes_radius[1], hole_amount)
        img.save_image(output_path)




KeyboardInterrupt: 

In [4]:
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import random

def process_image_task(image):
    input_path = image['input']
    output_path = image['output']
    source_name = image['source']

    img = ImageProcess(source_name)
    img.resize_image(im_sz, im_resizing_policy, im_resizing_method)
    img.save_image(input_path)

    # select random properties of distortion
    max_hz = random.uniform(im_max_hz[0], im_max_hz[1])
    mu = random.uniform(im_gaussian_mu[0], im_gaussian_mu[1])
    sigma = random.uniform(im_gaussian_std[0], im_gaussian_std[1])
    hole_amount = random.randint(im_holes_amount[0], im_holes_amount[1])

    img.dist_lowpass(max_hz)
    img.dist_noise_gaussian(mu, sigma)
    img.dist_blackholes(im_holes_radius[0], im_holes_radius[1], hole_amount)
    img.save_image(output_path)

# Flatten all image dicts
images_list = [image for val in filelist_dict.values() for image in val['images'].values()]

# Run in parallel with tqdm
if __name__ == '__main__':
    with Pool(cpu_count()) as pool:
        list(tqdm(pool.imap_unordered(process_image_task, images_list),
                  total=len(images_list),
                  desc="Preprocessing",
                  unit="img"))


Preprocessing: 100%|██████████| 7286/7286 [01:29<00:00, 81.72img/s]
