In [1]:
import os
from PIL import Image, ImageEnhance
import numpy as np

# Directories
input_directory = "/Users/ju158/Graduate_NEU/DS5500 Capstone/"
output_directory = "/Users/ju158/Graduate_NEU/DS5500 Capstone/augmented_images/"
os.makedirs(output_directory, exist_ok=True)

def preprocess_image(image_path, save_path):
    try:
        # Open image & convert to RGB
        image = Image.open(image_path).convert("RGB")
        
        # Standardize to fixed image size of 224x224
        image = image.resize((224, 224))
        
        # Convert to array & normalize pixel values [-1, 1]
        image_array = np.array(image).astype(np.float32)
        image_array = (image_array / 127.5) - 1.0
        
        # Save preprocessed image
        preprocessed_image = Image.fromarray(((image_array + 1) * 127.5).astype(np.uint8))
        preprocessed_image.save(save_path)
        return True  
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return False  

def augment_image(image_path, output_subdir, base_filename):
    try:
        # Open image & convert to RGB
        image = Image.open(image_path).convert("RGB")
        
        # Define augmentation transformations
        augmentations = {
            "flipped_horizontally": image.transpose(Image.FLIP_LEFT_RIGHT),
            "flipped_vertically": image.transpose(Image.FLIP_TOP_BOTTOM),
            "rotated_90": image.rotate(90),
            "rotated_180": image.rotate(180),
            "brightness_enhanced": ImageEnhance.Brightness(image).enhance(1.5)
        }
        
        # Apply each augmentation and save
        for aug_name, aug_image in augmentations.items():
            aug_image = aug_image.resize((224, 224))  # Standardize size
            aug_save_path = os.path.join(output_subdir, f"{base_filename}_{aug_name}.jpg")
            aug_image.save(aug_save_path)
        
        return True
    except Exception as e:
        print(f"Error augmenting {image_path}: {e}")
        return False

# Folders to perform data augmentation
folders_to_augment = ["bedbug", "silverfish", "house centipede"]

# Process selected folders
for folder in folders_to_augment:
    folder_path = os.path.join(input_directory, folder)
    output_folder = os.path.join(output_directory, folder)
    os.makedirs(output_folder, exist_ok=True)
    
    total_files = 0
    successfully_processed = 0
    
    for root, subdirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith(('.jpg', '.jpeg', '.png')):
                total_files += 1
                input_path = os.path.join(root, file)
                
                # Create corresponding subfolder in output directory
                relative_path = os.path.relpath(root, folder_path)
                output_subdir = os.path.join(output_folder, relative_path)
                os.makedirs(output_subdir, exist_ok=True)
                
                # Save preprocessed image
                output_path = os.path.join(output_subdir, file)
                if preprocess_image(input_path, output_path):
                    successfully_processed += 1
                
                # Perform data augmentation
                base_filename, _ = os.path.splitext(file)
                augment_image(input_path, output_subdir, base_filename)
            else:
                print(f"Skipped non-image file: {file}")
        
    # Print folder summary
    if total_files > 0:
        print(f"Folder: {folder} - Successfully standardized and augmented {successfully_processed}/{total_files} images.")

print("Standardizing, normalization, and data augmentation on images complete.")


Skipped non-image file: bedbug_urls (from Google Images).xlsx
Skipped non-image file: bedbug_urls (from Reddit).xlsx
Skipped non-image file: bedbug_urls.xlsx
Skipped non-image file: image_4_bedbug.webp
Skipped non-image file: image_62_bedbug.webp
Folder: bedbug - Successfully standardized and augmented 233/233 images.
Skipped non-image file: silverfish_urls (iNaturalist).xlsx
Skipped non-image file: silverfish_urls.xlsx
Skipped non-image file: image_128_silverfish.webp
Folder: silverfish - Successfully standardized and augmented 208/208 images.
Skipped non-image file: house centipede_urls (reddit).xlsx
Skipped non-image file: house centipede_urls.xlsx
Folder: house centipede - Successfully standardized and augmented 208/208 images.
Standardizing, normalization, and data augmentation on images complete.
