# Resize images

In [None]:
import os
import random
from PIL import Image, ExifTags
import shutil
from tqdm import tqdm

In [None]:
# Function to delete a directory and its contents
def delete_folder(folder):
    if os.path.exists(folder):
        shutil.rmtree(folder)

# Function to count the total number of images in all folders


def count_total_images(input_folder):
    total_images = 0
    for subdir, _, files in os.walk(input_folder):
        for file in files:
            if file.endswith(('png', 'jpg', 'jpeg', 'gif')):
                total_images += 1
    return total_images

# Function to resize images in a folder and split into train, test, and validation sets


def resize_and_split_images(input_folder, output_folder, test_size=0.2, validation_size=0.1):
    # Delete the output folder if it already exists
    delete_folder(output_folder)

    # Count the total number of images
    total_images = count_total_images(input_folder)
    processed_images = 0

    # Initialize tqdm progress bar
    pbar = tqdm(total=total_images, desc='Overall Progress', unit='images')

    # Split the data into train, test, and validation sets while maintaining the source folder structure
    for i, (subdir, dirs, files) in enumerate(os.walk(input_folder)):
        # Create corresponding subfolders in the output directory for train, test, and validation sets
        relative_path = os.path.relpath(subdir, input_folder)
        train_output_subfolder = os.path.join(
            output_folder, 'train', relative_path)
        test_output_subfolder = os.path.join(
            output_folder, 'test', relative_path)
        validation_output_subfolder = os.path.join(
            output_folder, 'validation', relative_path)

        # Initialize a flag to check if files were processed in the current folder
        files_processed = False

        # Iterate through files in the subfolder
        for idx, file in enumerate(files):
            # Check if file is an image
            if file.endswith(('png', 'jpg', 'jpeg', 'gif')):
                # If files were processed, set the flag to True
                files_processed = True

                input_path = os.path.join(subdir, file)

                # Open the image
                img = Image.open(input_path)

                # Check and correct image orientation if needed
                exif = img._getexif()
                if exif is not None:
                    for orientation in ExifTags.TAGS.keys():
                        if ExifTags.TAGS[orientation] == 'Orientation':
                            break
                    if orientation in exif:
                        if exif[orientation] in [3, 6, 8]:
                            img = img.rotate(
                                360 - exif[orientation], expand=True)

                # Resize the image
                img = img.resize((224, 224), Image.LANCZOS)

                # Split into train, test, and validation sets
                rand = random.random()
                if rand < test_size:
                    output_subfolder = test_output_subfolder
                    prefix = 'test'
                elif rand < test_size + validation_size:
                    output_subfolder = validation_output_subfolder
                    prefix = 'val'
                else:
                    output_subfolder = train_output_subfolder
                    prefix = 'train'

                # Generate new filename
                folder_name = os.path.basename(os.path.normpath(input_folder))
                subfolder_name = os.path.basename(os.path.normpath(subdir))
                new_filename = f"{prefix}_{subfolder_name}_{idx}.{file.split('.')[-1]}"
                output_path = os.path.join(output_subfolder, new_filename)

                # Save the resized image
                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                img.save(output_path)

                # Increment the count of processed images
                processed_images = processed_images+1
                pbar.update(1)  # Update tqdm progress bar

        # Print overall progress alongside tqdm progress bar
        pbar.set_postfix(
            {'Progress': f'{processed_images}/{total_images} images processed ({processed_images/total_images*100:.2f}%)'})

    # Close tqdm progress bar
    pbar.close()

In [None]:
# Paths
input_folder = "F:/datasets/dataset"
output_folder = "F:/thesis/data"

# Call the function to resize images, rename, and split into train, test, and validation sets
resize_and_split_images(input_folder, output_folder)