In [1]:
import os
import shutil
import PIL.Image

from collections import defaultdict
from sklearn.model_selection import train_test_split

In [2]:
base_dir = os.path.join('..', '..', 'data')
blurred = os.path.join(base_dir, 'images_blurred')
dirty_lens = os.path.join(base_dir, 'images_dirty_lens')
gradient_shadows = os.path.join(base_dir, 'images_gradient_shadows')
rescaled = os.path.join(base_dir, 'images_original_inception_resnet_v2_200x150')
target_training_folder = os.path.join(
    base_dir,
    'images_original_inception_resnet_v2_200x150_splitted_with_augmentation',
    'training')
target_validation_folder = os.path.join(
    base_dir,
    'images_original_inception_resnet_v2_200x150_splitted_with_augmentation',
    'validation')
get_paths = lambda path: [
    f'{os.path.join(root, file)}'
    for root, dirs, files in os.walk(path)
    for file in files]

In [3]:
original_paths = get_paths(rescaled)
train_files, valid_files = train_test_split(original_paths, test_size=0.1, random_state=57)

train_files = list(map(lambda path: (
    path,
    os.path.join(target_training_folder, path.split(os.sep)[-1])),
    train_files
))
valid_files = list(map(lambda path: (
    path,
    os.path.join(target_validation_folder, path.split(os.sep)[-1])),
    valid_files
))

In [4]:
if not len(os.listdir(target_training_folder)):
    for source, destination in train_files + valid_files:
        shutil.copy(source, destination)

In [5]:
SMALLER_WIDTH = 600 // 3
SMALLER_HEIGHT = 450 // 3


def match_and_copy_augmented(sources: list[str], all_files: list[str]) -> None:
    name_counters = defaultdict(lambda: 0)
    get_name = lambda x: x.split(os.sep)[-1].split('.')[0]

    for source in sources:
        source_name = get_name(source)

        for file_path in all_files:
            augmented_name = get_name(file_path)

            if augmented_name.startswith(source_name):
                count = name_counters[source_name]
                destination = os.path.join(
                    os.sep.join(source.split(os.sep)[:-1]),
                    f'{source_name}_{count}.jpg')
                name_counters[source_name] += 1

                shutil.copy(file_path, destination)

                image = PIL.Image.open(destination)
                smaller_image = image.resize((SMALLER_WIDTH, SMALLER_HEIGHT))

                smaller_image.save(destination)

In [6]:
augmentation_paths = \
    get_paths(blurred) + \
    get_paths(dirty_lens) + \
    get_paths(gradient_shadows)

match_and_copy_augmented(get_paths(target_training_folder), augmentation_paths)
match_and_copy_augmented(get_paths(target_validation_folder), augmentation_paths)