In [None]:
import os
from PIL import Image
import numpy as np

def load_image_dataset(folder_path):
    """
    Loads a dataset of images from a folder.
    
    Args:
        folder_path (str): Path to the folder containing the images.
    
    Returns:
        list: A list of PIL Image objects.
    """
    images = []
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
            image_path = os.path.join(folder_path, filename)
            img = Image.open(image_path)
            images.append((filename, img))
    return images

def compress_image(image, output_path, compression_ratio=75):
    """
    Compresses an image using JPEG 2000 compression.
    
    Args:
        image (PIL.Image): The image to compress.
        output_path (str): Path to save the compressed image.
        compression_ratio (int): Compression quality (0-100). Lower values mean higher compression.
    """
    image.save(output_path, "JPEG2000", quality_layers=[compression_ratio])

def calculate_compression_ratio(original_size, compressed_size):
    """
    Calculates the compression ratio.
    
    Args:
        original_size (int): Size of the original image in bytes.
        compressed_size (int): Size of the compressed image in bytes.
    
    Returns:
        float: Compression ratio.
    """
    return original_size / compressed_size

def compress_dataset(input_folder, output_folder, compression_ratio=75):
    """
    Compresses all images in a dataset folder and calculates the compression ratio.
    
    Args:
        input_folder (str): Path to the folder containing the input images.
        output_folder (str): Path to save the compressed images.
        compression_ratio (int): Compression quality (0-100). Lower values mean higher compression.
    """
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Load the dataset
    images = load_image_dataset(input_folder)
    compression_ratios = []
    
    # Iterate through all images
    for filename, img in images:
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)
        
        # Get the original file size
        original_size = os.path.getsize(input_path)
        
        # Compress the image
        compress_image(img, output_path, compression_ratio)
        
        # Get the compressed file size
        compressed_size = os.path.getsize(output_path)
        
        # Calculate the compression ratio
        ratio = calculate_compression_ratio(original_size, compressed_size)
        compression_ratios.append(ratio)
        
        print(f"Compressed {filename}: Original Size = {original_size} bytes, Compressed Size = {compressed_size} bytes, Compression Ratio = {ratio:.2f}")
    
    # Print average compression ratio
    avg_ratio = np.mean(compression_ratios)
    print(f"\nAverage Compression Ratio: {avg_ratio:.2f}")

# Example usage
input_folder = "dataset"  # Replace with your dataset path
output_folder = "compressii_dataset2"   # Replace with your output folder
compression_ratio = 75  # Adjust this value for higher/lower compression

compress_dataset(input_folder, output_folder, compression_ratio)

Compressed A01_1.jpg: Original Size = 2623657 bytes, Compressed Size = 339154 bytes, Compression Ratio = 7.74
Compressed A01_2.jpg: Original Size = 2564656 bytes, Compressed Size = 339073 bytes, Compression Ratio = 7.56
Compressed A02_1.jpg: Original Size = 2265900 bytes, Compressed Size = 338924 bytes, Compression Ratio = 6.69
Compressed A02_2.jpg: Original Size = 2542533 bytes, Compressed Size = 339090 bytes, Compression Ratio = 7.50
Compressed A03_1.jpg: Original Size = 2265900 bytes, Compressed Size = 338924 bytes, Compression Ratio = 6.69
Compressed A03_2.jpg: Original Size = 2529543 bytes, Compressed Size = 338721 bytes, Compression Ratio = 7.47
Compressed A04_1.jpg: Original Size = 2542533 bytes, Compressed Size = 339090 bytes, Compression Ratio = 7.50
Compressed A04_2.jpg: Original Size = 2529543 bytes, Compressed Size = 338721 bytes, Compression Ratio = 7.47
Compressed A05_1.jpg: Original Size = 2370849 bytes, Compressed Size = 338582 bytes, Compression Ratio = 7.00
Compressed