In [1]:
import os
import shutil
import random
import config
from PIL import Image
from image_preprocessing import crop_resize

In [2]:
def create_subset_resize_and_split(source_dir, train_test_base_folder, category, size, n, test_ratio=0.2):
    image_files = [f for f in os.listdir(source_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg')) and not f.startswith('._')]
    random.shuffle(image_files)
    selected_files = image_files[:n]

    split_index = int(len(selected_files) * (1 - test_ratio))

    train_folder = os.path.join(train_test_base_folder, 'train', category)
    test_folder = os.path.join(train_test_base_folder, 'test', category)

    os.makedirs(train_folder, exist_ok=True)
    os.makedirs(test_folder, exist_ok=True)
    
    file_counter = 0

    for i, filename in enumerate(selected_files):
        input_path = os.path.join(source_dir, filename)
        output_path = os.path.join(train_folder if i < split_index else test_folder, filename)

        try:
            with Image.open(input_path) as image:
                processed_image = crop_resize(image, size)
                processed_image.save(output_path)
            file_counter += 1
            
            if file_counter % 1000 == 0:
                print(f"Saved {file_counter} files")
            
        except Exception as e:
            print(f"Error processing {input_path}: {e}")

In [3]:
size = (700, 700)
n_film = 30000
n_digital = 30000
create_subset_resize_and_split(config.tts_source_dir_film, config.tts_base_folder, 'film', size, n_film)
create_subset_resize_and_split(config.tts_source_dir_digital, config.tts_base_folder, 'digital', size, n_digital)

Saved 1000 files
Saved 2000 files
Saved 3000 files
Saved 4000 files
Saved 5000 files
Saved 6000 files
Saved 7000 files
Saved 8000 files
Saved 9000 files
Saved 10000 files
Saved 11000 files
Saved 12000 files
Saved 13000 files
Saved 14000 files
Saved 15000 files
Saved 16000 files
Saved 17000 files
Saved 18000 files
Saved 19000 files
Saved 20000 files
Saved 21000 files
Saved 22000 files
Saved 23000 files
Saved 24000 files
Saved 25000 files
Saved 26000 files
Saved 27000 files
Saved 28000 files
Saved 29000 files
Saved 30000 files
Saved 1000 files
Saved 2000 files
Saved 3000 files
Saved 4000 files
Saved 5000 files
Saved 6000 files
Saved 7000 files
Saved 8000 files
Saved 9000 files
Saved 10000 files
Saved 11000 files
Saved 12000 files
Saved 13000 files
Saved 14000 files
Saved 15000 files
Saved 16000 files
Saved 17000 files
Saved 18000 files
Saved 19000 files
Saved 20000 files
Saved 21000 files
Saved 22000 files
Saved 23000 files
Saved 24000 files
Saved 25000 files
Saved 26000 files
Saved 2700