In [2]:
# Libraries
import os
from PIL import Image, ImageEnhance
import numpy as np
from collections import defaultdict

# Directories
RAW_IMG_DIR = "DATA/"
OUTPUT_DIR = "STANDARDIZED_AUGMENTED_DATA/"

# Define get_folders function before using it
def get_folders(directory):
    '''
    Param: directory - str
    Use: gets all subfolders in directory
    Returns: list of folder names
    '''
    folders = []
    for entry in os.scandir(directory):
        if entry.is_dir():
            folders.append(entry.name)
    return folders

# Get insect folders list
INSECT_FOLDERS_LIST = get_folders(RAW_IMG_DIR)

# make dir if doesn't exist already
os.makedirs(OUTPUT_DIR, exist_ok=True)

def get_image_file(img_path):
    '''
    Param: img_path - str
    Use: Splits the string from last occurrence of forward slash.
    Returns: filename as str
    '''

    filename = img_path.rsplit('/', 1)[-1]
    return filename

def preprocess_image(image_path, desired_square_dim = 224, verbose = True):
    '''
    Param: image_path - str, desired_square_dim - int
    Use: 
        # convert to RBG format
        # standardize to desired size
    
    Returns: standardized image object (none if NA), log as dict of image errors if applicable
    '''
    error_log = defaultdict(list)
    img_name = get_image_file(image_path)
    
    try:
        # Open image & convert to RGB
        image = Image.open(image_path).convert("RGB")
        
        # Standardize to fixed image size
        image = image.resize((desired_square_dim, desired_square_dim))
        
        return image, error_log  #successful preprocessing (error log will be empty)
    
    except Exception as e:
        
        if verbose:
            print(f"Error processing {image_path}: {e}")
            
        #update error log
        error_log[img_name] = e

        return None, error_log   #unsuccessful preprocessing

def normalize_image(img_object):
    '''
    Param: image - PIL Image object
    Returns: Normalized NumPy array with pixel values in the range [-1, 1] (can change this range if needed)
    '''
    image_array = np.array(img_object).astype(np.float32)
    normalized_array = (image_array / 127.5) - 1.0
    return normalized_array

def augment_image(img_object, output_subdir, base_filename, verbose=True):
    '''
    Param: img_object - Image object, output_subdir - str, base_filename - str
    Use:
        # create new images by flipping each horizontally, vertically, rotate, and brightness
        # saves augmented images in output folder
    
    Returns: boolean reflection image augmentation success, log as dict of image errors if applicable
    '''
    
    error_log = defaultdict(list)
    
    try:

        image = img_object

        # augmentation transformations to be applied
        augmentations = {
            "original": image,
            "flipped_horizontally": image.transpose(Image.FLIP_LEFT_RIGHT),
            "flipped_vertically": image.transpose(Image.FLIP_TOP_BOTTOM),
            "rotated_90": image.rotate(90),
            "rotated_180": image.rotate(180),
            "brightness_enhanced": ImageEnhance.Brightness(image).enhance(1.5)
        }
        
        # Apply each augmentation and save
        for aug_name, aug_image in augmentations.items():

            #save the 5 generated images AND the original (6 images total) 
            aug_save_path = os.path.join(output_subdir, f"{base_filename}_{aug_name}.jpg")
            aug_image.save(aug_save_path)
        
        return True, error_log
    
    except Exception as e:

        if verbose:
            print(f"Error processing {image_path}: {e}")

        #update error log
        error_log[base_filename] = e

        return False, error_log

def main():
    total_files = 0
    successfully_processed_and_standardized = 0


    # Process selected folders
    for folder in INSECT_FOLDERS_LIST:

        # get the path of the folder
        folder_path = os.path.join(RAW_IMG_DIR, folder)

        # get path of output folder
        output_folder = os.path.join(OUTPUT_DIR, folder)

        # make insect subfolders if DNE
        os.makedirs(output_folder, exist_ok=True)


        for root, subdirs, files in os.walk(folder_path):
            for file in files:  # for each image in RAW_IMG_DATA
                total_files += 1

                # get img files
                if file.lower().endswith(('.jpg', '.jpeg', '.png')):

                    # get complete path to raw image
                    input_path = os.path.join(root, file)

                    # saving relative path to this img
                    relative_path = os.path.relpath(root, folder_path)

                    # constuct path to save processed img to corresponding output folder
                    output_subdir = os.path.join(output_folder, relative_path)

                    # saving info needed for output file saving
                    base_filename, file_extension = os.path.splitext(file)

                    # 1) standardize this img
                    processed_img, process_error_log = preprocess_image(input_path)

                    # successfully processed img
                    if processed_img is not None:
                        
                        # 2) increase data set with augmentation on processed image
                        final_img_success, augment_error_log = augment_image(processed_img, output_subdir, base_filename)

                        if final_img_success == True:
                            successfully_processed_and_standardized += 6

                        else:
                            print(augment_error_log)
                            
                        # get the file extension from the original image
                        file_extension = os.path.splitext(file)[1]
                        # 3) normalize the standardized augmented image
                        for aug_name in ["original", "flipped_horizontally", "flipped_vertically", "rotated_90", "rotated_180", "brightness_enhanced"]:
                            aug_image_path = os.path.join(output_subdir, f"{base_filename}_{aug_name}{file_extension}")
                            aug_image = Image.open(aug_image_path)
                            normalized_array = normalize_image(aug_image)
                            

                    # unsuccessfully processed image
                    else:
                        print(process_error_log)
                        
                else:
                    print(f"Skipped non-image file: {file}")

    # summary
    print(
        f"\n\nStandardizing, data augmentation, and image normalization complete!\n"
        f"Successfully processed: {successfully_processed_and_standardized}, Total raw images: {total_files}"
    )

if __name__ == "__main__":
    main()


Skipped non-image file: .DS_Store


Standardizing, data augmentation, and image normalization complete!
Successfully processed: 360, Total raw images: 61
