In [2]:
import os
import cv2
import numpy as np

In [7]:
def normalize_image(image, method="min-max", mean=None, std=None):
    """
    Normalize an image.
    Args:
        image: Input image as a NumPy array.
        method: "min-max" or "standardization".
        mean: Mean value for standardization (required if method="standardization").
        std: Standard deviation for standardization (required if method="standardization").
    Returns:
        Normalized image as a NumPy array.
    """
    image = image.astype('float32')
    if method == "min-max":
        return image / 255.0
    elif method == "standardization":
        if mean is None or std is None:
            raise ValueError("Mean and std are required for standardization.")
        return (image - mean) / std
    else:
        raise ValueError("Invalid method. Choose 'min-max' or 'standardization'.")

def process_and_save_images(input_dir, output_dir, method="min-max", mean=None, std=None):
    """
    Normalize images in a directory and save them to a new directory.
    Args:
        input_dir: Path to the input directory containing images.
        output_dir: Path to the output directory to save normalized images.
        method: "min-max" or "standardization".
        mean: Mean value for standardization.
        std: Standard deviation for standardization.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for root, _, files in os.walk(input_dir):
        # Create corresponding output subdirectory
        relative_path = os.path.relpath(root, input_dir)
        output_subdir = os.path.join(output_dir, relative_path)
        if not os.path.exists(output_subdir):
            os.makedirs(output_subdir)

        for file in files:
            if file.endswith(('.png', '.jpg', '.jpeg')):  # Add more extensions if needed
                img_path = os.path.join(root, file)
                image = cv2.imread(img_path)
                normalized_image = normalize_image(image, method, mean, std)
                # Save the normalized image
                output_path = os.path.join(output_subdir, file)
                cv2.imwrite(output_path, (normalized_image * 255).astype('uint8'))

# Paths to your data

In [9]:
# Paths to your data
train_dir = "data/split_dataset/train"
val_dir = "data/split_dataset/val"
test_dir = "data/split_dataset/test"
output_dir = "data_normalized"

# Calculate mean and std from training images for standardization
train_images = []
for root, _, files in os.walk(train_dir):
    for file in files:
        if file.endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(root, file)
            image = cv2.imread(img_path).astype('float32') / 255.0
            train_images.append(image)

train_images = np.array(train_images)
mean = train_images.mean(axis=(0, 1, 2))  # Mean per channel
std = train_images.std(axis=(0, 1, 2))    # Std per channel

# Process and save images
process_and_save_images(train_dir, os.path.join(output_dir, "train"), method="standardization", mean=mean, std=std)
process_and_save_images(val_dir, os.path.join(output_dir, "val"), method="standardization", mean=mean, std=std)
process_and_save_images(test_dir, os.path.join(output_dir, "test"), method="standardization", mean=mean, std=std)