In [2]:
import cv2
import os


def convert_to_jpg(input_image_path, output_image_path, quality=95):
    # Read the image
    img = cv2.imread(input_image_path)
    
    # Define JPEG quality (1-100) (higher is better quality but larger file size)
    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
    
    # Write the image in JPEG format with the specified quality
    result, encimg = cv2.imencode('.jpg', img, encode_param)
    
    if result:
        # Save the encoded image to file
        with open(output_image_path, mode='wb') as f:
            encimg.tofile(f)
    else:
        raise Exception('Could not encode image to JPG')
    

def batch_convert_to_jpg(input_folder, output_folder, quality=85, silent=False):
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Process each file in the input folder
    for filename in os.listdir(input_folder):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, os.path.splitext(filename)[0] + '.jpg')
        
        try:
            convert_to_jpg(input_path, output_path, quality)
            if not silent:
                print(f'Converted {input_path} to {output_path}')
        except Exception as e:
            print(f'Failed to convert {input_path}: {e}')

In [9]:
import shutil
import os

def copy_image(src_path, dest_path, silent=False):
    try:
        # Check if the source file exists
        if not os.path.isfile(src_path):
            print(f"The source file {src_path} does not exist.")
            return

        # Create the destination directory if it does not exist
        os.makedirs(os.path.dirname(dest_path), exist_ok=True)

        # Copy the file
        shutil.copy(src_path, dest_path)
        if not silent:
            print(f"Image copied successfully from {src_path} to {dest_path}")

    except Exception as e:
        print(f"An error occurred: {e}")

In [11]:
# Example usage
input_path = '../data/genimage/resized512x512'
output_main_path = '../data/genimage_512_jpg'

for quality in [100]:
    output_path = os.path.join(output_main_path, f'jpg_quality_{quality}')
    os.makedirs(output_path, exist_ok=True)
    for generator in os.listdir(input_path):
        generator_path = os.path.join(input_path, generator)
        for dataset in os.listdir(generator_path):
            dataset_path = os.path.join(generator_path, dataset)
            for category in os.listdir(os.path.join(input_path, generator, dataset)):
                category_path = os.path.join(dataset_path, category)
                if category == 'nature':
                    # just copy the nature files since they are already jpg
                    os.makedirs(os.path.join(output_path, generator, dataset, category), exist_ok=True)
                    for filename in os.listdir(category_path):
                        source = os.path.join(category_path, filename)
                        target = os.path.join(os.path.join(output_path, generator, dataset, category))
                        copy_image(source, target, silent=True)
                # else:
                #     # convert the other files to jpg
                #     batch_convert_to_jpg(category_path, os.path.join(output_path, generator, dataset, category), quality=quality, silent=True)            
        print(f'Converted {generator} to jpgs')

Converted imagenet_ai_0419_biggan to jpgs
Converted imagenet_ai_0419_vqdm to jpgs
Converted imagenet_ai_0424_sdv5 to jpgs
Converted imagenet_ai_0424_wukong to jpgs
Converted imagenet_ai_0508_adm to jpgs
Converted imagenet_glide to jpgs
Converted imagenet_midjourney to jpgs
