In [None]:
import os
from PIL import Image
from tqdm import tqdm

def resize_and_save_image(input_path, output_path, size=(64, 64)):
    """Open, resize and save an image."""
    with Image.open(input_path) as img:
        img = img.resize(size)
        img.save(output_path, format="png")

def process_images(input_dir, output_dir, size=(64, 64)):
    """Process and resize all images from the input directory to the output directory."""
    for split in os.listdir(input_dir):
        split_path = os.path.join(input_dir, split)
        for _class in os.listdir(split_path):
            class_input_path = os.path.join(split_path, _class)
            class_output_path = os.path.join(output_dir, split, _class)
            os.makedirs(class_output_path, exist_ok=True)
            
            image_files = os.listdir(class_input_path)
            for image_file in tqdm(image_files, desc=f"{split} - {_class}"):
                input_image_path = os.path.join(class_input_path, image_file)
                output_image_path = os.path.join(class_output_path, image_file)
                resize_and_save_image(input_image_path, output_image_path, size)

if __name__ == "__main__":
    input_directory = '../data/og'
    output_directory = '../data/resized'
    image_size = (64, 64)
    
    process_images(input_directory, output_directory, image_size)


In [3]:
import os
from PIL import Image
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor


def resize_and_save_image(input_path, output_path, size=(64, 64)):
    """Open, resize and save an image."""
    try:
        with Image.open(input_path) as img:
            img = img.resize(size)
            img.save(output_path, format="png")
    except Exception as e:
        print(f"Error processing {input_path}: {e}")

def process_class_images(class_input_path, class_output_path, image_files, size, _class):
    """Process images for a single class using multithreading."""
    with ThreadPoolExecutor(max_workers=100) as executor:
        futures = []
        for image_file in image_files:
            input_image_path = os.path.join(class_input_path, image_file)
            output_image_path = os.path.join(class_output_path, image_file)
            futures.append(executor.submit(resize_and_save_image, input_image_path, output_image_path, size))
        
        for _ in tqdm(futures, desc=f"Processing {_class} images"):
            _ = _.result()  # Ensure each thread is completed

def process_images(input_dir, output_dir, size=(64, 64)):
    """Process and resize all images from the input directory to the output directory."""
    for split in os.listdir(input_dir):
        split_path = os.path.join(input_dir, split)
        for _class in os.listdir(split_path):
            class_input_path = os.path.join(split_path, _class)
            class_output_path = os.path.join(output_dir, split, _class)
            os.makedirs(class_output_path, exist_ok=True)
            
            image_files = os.listdir(class_input_path)
            process_class_images(class_input_path, class_output_path, image_files, size, _class)

if __name__ == "__main__":
    input_directory = 'data/og'
    output_directory = 'data/resized'
    image_size = (64, 64)
    
    process_images(input_directory, output_directory, image_size)


Processing dog images: 100%|██████████| 4678/4678 [00:25<00:00, 186.23it/s]
Processing wild images: 100%|██████████| 4593/4593 [00:13<00:00, 337.70it/s]  
Processing cat images: 100%|██████████| 5065/5065 [00:24<00:00, 209.27it/s]
Processing dog images: 100%|██████████| 491/491 [00:00<00:00, 1506.94it/s]
Processing wild images: 100%|██████████| 483/483 [00:00<00:00, 1086.38it/s]
Processing cat images: 100%|██████████| 493/493 [00:00<00:00, 1873.36it/s]


In [2]:
os.listdir()

['.git',
 'README.md',
 'download.sh',
 '.python-version',
 'src',
 'train-autoencoder.py',
 'notebooks',
 'uv.lock',
 '.gitignore',
 'pyproject.toml',
 'data']