In [1]:
import os
import shutil
import random
import config
from PIL import Image

In [2]:
# def create_subset(original_dir, target_dir, n=5000):
#     if not os.path.exists(target_dir):
#         os.makedirs(target_dir)

#     files = os.listdir(original_dir)
#     files = [os.path.join(original_dir, f) for f in files]
#     random.shuffle(files)

#     for f in files[:n]:
#         shutil.copy(f, target_dir)

In [3]:
def resize_and_crop_image(input_path, output_path, size):
    with Image.open(input_path) as image:
        target_ratio = size[0] / size[1]
        image_ratio = image.width / image.height

        # If the image aspect ratio is wider than the target, it needs to be cropped horizontally
        if image_ratio > target_ratio:
            new_width = int(target_ratio * image.height)
            left = (image.width - new_width) / 2
            top = 0
            right = left + new_width
            bottom = image.height
        else:
            # If the image aspect ratio is narrower than the target, it needs to be cropped vertically
            new_height = int(image.width / target_ratio)
            left = 0
            top = (image.height - new_height) / 2
            right = image.width
            bottom = top + new_height

        image = image.crop((left, top, right, bottom))
        image = image.resize(size, Image.ANTIALIAS)

        image.save(output_path)

In [4]:
def create_subset_and_resize(original_dir, target_dir, size, n):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    files = os.listdir(original_dir)
    # Filter out non-image files, especially those starting with '._'
    image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg')) and not f.startswith('._')]
    random.shuffle(image_files)

    for f in image_files[:n]:
        input_path = os.path.join(original_dir, f)
        output_path = os.path.join(target_dir, f)
        try:
            resize_and_crop_image(input_path, output_path, size)
        except Exception as e:
            print(f"Error processing {input_path}: {e}")

In [6]:
size = (600, 600)
create_subset_and_resize(config.subset_original_dir_film, config.subset_target_dir_film, size, 9900)
create_subset_and_resize(config.subset_original_dir_digital, config.subset_target_dir_digital, size, 9900)

In [None]:
# def resize_and_crop_directory(source_directory, target_directory, size):
#     if not os.path.exists(target_directory):
#         os.makedirs(target_directory)

#     for filename in os.listdir(source_directory):
#         if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
#             input_path = os.path.join(source_directory, filename)
#             output_path = os.path.join(target_directory, filename)
#             resize_and_crop_image(input_path, output_path, size)

In [None]:
# resize_and_crop_directory(config.subset_crop_original_dir_film, congif.subset_crop_target_dir_film, (300, 300))
# resize_and_crop_directory(config.subset_crop_original_dir_digital, congif.subset_crop_target_dir_digital, (300, 300))

In [None]:
def create_train_test_split(source_folder, train_test_base_folder, category, split_ratio=0.8):
    train_folder = os.path.join(train_test_base_folder, 'train', category)
    test_folder = os.path.join(train_test_base_folder, 'test', category)

    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)

    filenames = [f for f in os.listdir(source_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    random.shuffle(filenames)  # Shuffle the list of files for random splitting

    split_index = int(len(filenames) * split_ratio)

    train_filenames = filenames[:split_index]
    test_filenames = filenames[split_index:]

    for filename in train_filenames:
        shutil.copy(os.path.join(source_folder, filename), train_folder)
    for filename in test_filenames:
        shutil.copy(os.path.join(source_folder, filename), test_folder)


In [None]:
create_train_test_split(config.tts_source_film, config.tts_base_folder, 'film')
create_train_test_split(config.tts_source_digital, config.tts_base_folder, 'digital')

In [None]:
def process_images(source_dir_film, target_dir