#### Iteration #1 – Preprocessing

In [19]:
import os
from PIL import Image
import numpy as np

# ITERATION 1: PREPROCESSING THE DATA
# ------------------------------------
# 1) Reading Images: Code opens images from a specified input directory & converts them to RGB format
# 2) Standardization: Images are resized to a fixed size of 224x224 pixels
# 3) Normalization: Pixel values are normalized to a range between -1 and 1
# 4) Saving: Processed images are saved in a corresponding output directory
# 5) Processing Directories: Code iterates through all subdirectories in the input directory, processes image files, then saves the preprocessed images in corresponding subfolders

# Directories
input_directory = "/Users/shirleyfong/Desktop/DS 5500/Code/insect data"
output_directory = "/Users/shirleyfong/Desktop/DS 5500/Code/insect data/preprocessed_images" 
os.makedirs(output_directory, exist_ok=True)

def preprocess_image(image_path, save_path):
    try:
        # Open image & convert to RGB
        image = Image.open(image_path).convert("RGB")
        
        # Standardize to fixed image size of 224x224
        image = image.resize((224, 224))
        
        # Convert to array & normalize pixel values [-1, 1]
        image_array = np.array(image).astype(np.float32)
        image_array = (image_array / 127.5) - 1.0 # Contains pixel values normalized between -1 and 1

        # Checking range to ensure images were normalized between -1 and 1
        #print(f"Processing: {image_path}")
        #print(f"  Min pixel value after normalization: {image_array.min()}")
        #print(f"  Max pixel value after normalization: {image_array.max()}")
        
        # Save preprocessed image
        preprocessed_image = Image.fromarray(((image_array + 1) * 127.5).astype(np.uint8))
        preprocessed_image.save(save_path)
        return True  
    except Exception as e: # Fail to processs image, prints message
        print(f"Error processing {image_path}: {e}") 
        return False  

# Process all images in subfolders
for root, subdirs, files in os.walk(input_directory):
    total_files = 0
    successfully_processed = 0
    
    for file in files:
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            total_files += 1
            input_path = os.path.join(root, file)
            
            # Create corresponding subfolder in output directory
            relative_path = os.path.relpath(root, input_directory)
            output_subdir = os.path.join(output_directory, relative_path)
            os.makedirs(output_subdir, exist_ok=True)
            
            # Save preprocessed image
            output_path = os.path.join(output_subdir, file)
            if preprocess_image(input_path, output_path):
                successfully_processed += 1
        else:
            print(f"Skipped non-image file: {file}")
    
    # Print folder summary
    if total_files > 0:
        print(f"Folder: {os.path.basename(root)} - Successfully standardized and normalized {successfully_processed}/{total_files} images.")

print("Standardizing and normalization on images complete.")


Skipped non-image file: .DS_Store
Skipped non-image file: .DS_Store
Error processing /Users/shirleyfong/Desktop/DS 5500/Code/insect data/flea/image_94_flea_google.jpg: cannot identify image file '/Users/shirleyfong/Desktop/DS 5500/Code/insect data/flea/image_94_flea_google.jpg'
Folder: flea - Successfully standardized and normalized 159/160 images.
Skipped non-image file: .DS_Store
Skipped non-image file: image_83_tick.gif
Error processing /Users/shirleyfong/Desktop/DS 5500/Code/insect data/tick/image_31_tick_google.jpg: cannot identify image file '/Users/shirleyfong/Desktop/DS 5500/Code/insect data/tick/image_31_tick_google.jpg'
Error processing /Users/shirleyfong/Desktop/DS 5500/Code/insect data/tick/image_44_tick_google.jpg: cannot identify image file '/Users/shirleyfong/Desktop/DS 5500/Code/insect data/tick/image_44_tick_google.jpg'
Error processing /Users/shirleyfong/Desktop/DS 5500/Code/insect data/tick/image_45_tick_google.jpg: cannot identify image file '/Users/shirleyfong/Desk