In [7]:
import os
import glob


def find_images(directory):
    # List to store paths of images
    image_list = []
    # Supported image extensions
    extensions = ["*.jpeg"]

    # Walk through directory
    for root, dirs, files in os.walk(directory):
        for extension in extensions:
            # Use glob to find files with the right extension
            image_list.extend(glob.glob(os.path.join(root, extension)))

    return image_list


# Path to the directory
directory_path = "/Volumes/ThirdHeart/Github-Y/gameplay-videos/grouped_dataset"
images = find_images(directory_path)
print(len(images))

17088


In [8]:
# import os
# import glob
# from PIL import Image


# def find_images(directory):
#     # List to store paths of images
#     image_list = []
#     # Supported image extensions
#     extensions = [
#         "*.jpeg",
#         "*.jpg",
#         "*.png",
#         "*.bmp",
#     ]  # Include other common formats to check

#     # Walk through directory
#     for root, dirs, files in os.walk(directory):
#         for extension in extensions:
#             # Use glob to find files with the right extension
#             image_list.extend(glob.glob(os.path.join(root, extension)))

#     return image_list


# def convert_to_jpeg(image_list):
#     for image_path in image_list:
#         with Image.open(image_path) as img:
#             # Convert image to RGB to avoid issues with RGBA or other modes when saving as JPEG
#             rgb_img = img.convert("RGB")
#             # Save the image as JPEG with quality 'maximum' to avoid compression
#             rgb_img.save(image_path, "JPEG", quality=100)


# # Path to the directory
# directory_path = "/Volumes/ThirdHeart/Github-Y/gameplay-videos/grouped_dataset"
# images = find_images(directory_path)
# convert_to_jpeg(images)
# print(f"Processed {len(images)} images.")

In [None]:
import os
import glob
from PIL import Image
from tqdm import tqdm
from multiprocessing import Pool

def find_images(directory):
    # List to store paths of images
    image_list = []
    # Supported image extensions
    extensions = ["*.jpeg", "*.jpg", "*.png", "*.bmp"]  # Include other common formats to check

    # Walk through directory
    for root, dirs, files in os.walk(directory):
        for extension in extensions:
            # Use glob to find files with the right extension
            image_list.extend(glob.glob(os.path.join(root, extension)))

    return image_list

def convert_to_jpeg(image_path):
    with Image.open(image_path) as img:
        # Convert image to RGB to avoid issues with RGBA or other modes when saving as JPEG
        rgb_img = img.convert('RGB')
        # Save the image as JPEG with quality 'maximum' to avoid compression
        rgb_img.save(image_path, 'JPEG', quality=100)

def process_images(image_list):
    # Use Pool to utilize multiple cores
    with Pool() as pool:
        list(tqdm(pool.imap(convert_to_jpeg, image_list), total=len(image_list)))

# Path to the directory
directory_path = "/Volumes/ThirdHeart/Github-Y/gameplay-videos/grouped_dataset"
images = find_images(directory_path)
process_images(images)
print(f"Processed {len(images)} images.")

In [3]:
import os
import glob
from PIL import Image
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor


def find_images(directory):
    image_list = []
    extensions = ["*.jpeg", "*.jpg", "*.png", "*.bmp"]
    for root, dirs, files in os.walk(directory):
        for extension in extensions:
            image_list.extend(glob.glob(os.path.join(root, extension)))
    return image_list


def verify_jpeg(image_path):
    try:
        with Image.open(image_path) as img:
            if img.format != "JPEG":
                print(f"Non-JPEG file found: {image_path}")
    except IOError:
        print(f"Cannot open/read file: {image_path}")


def process_images(image_list):
    with ThreadPoolExecutor() as executor:
        list(tqdm(executor.map(verify_jpeg, image_list), total=len(image_list)))


directory_path = "/Volumes/ThirdHeart/Github-Y/gameplay-videos/grouped_dataset"
images = find_images(directory_path)
process_images(images)
print(f"Verified {len(images)} images.")

100%|██████████| 17088/17088 [00:01<00:00, 13906.71it/s]

Verified 17088 images.





In [7]:
import os
import glob
from PIL import Image
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor


def find_images(directory):
    image_list = []
    extensions = ["*.jpeg", "*.jpg", "*.png", "*.bmp"]
    for root, dirs, files in os.walk(directory):
        for extension in extensions:
            image_list.extend(glob.glob(os.path.join(root, extension)))
    return image_list


def reencode_jpeg(image_path):
    try:
        with Image.open(image_path) as img:
            # Convert image to RGB to avoid issues with RGBA or other modes
            rgb_img = img.convert("RGB")
            # Save the image as JPEG with a high quality to minimize loss
            rgb_img.save(
                image_path, "JPEG", quality=85
            )  # A high quality value that generally preserves detail well
    except IOError:
        print(f"Cannot open/read file: {image_path}")


def process_images(image_list):
    with ThreadPoolExecutor() as executor:
        list(tqdm(executor.map(reencode_jpeg, image_list), total=len(image_list)))


directory_path = "/Volumes/ThirdHeart/Github-Y/gameplay-videos/grouped_dataset"
images = find_images(directory_path)
process_images(images)
print(f"Re-encoded {len(images)} images.")

100%|██████████| 17088/17088 [01:37<00:00, 175.54it/s]

Re-encoded 17088 images.





In [10]:
import os
import glob
from PIL import Image
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor


def find_images(directory):
    image_list = []
    extensions = ["*.jpeg", "*.jpg", "*.png", "*.bmp"]
    for root, dirs, files in os.walk(directory):
        for extension in extensions:
            image_list.extend(glob.glob(os.path.join(root, extension)))
    return image_list


def reencode_jpeg(image_path):
    max_width = 2560
    max_height = 1440
    try:
        with Image.open(image_path) as img:
            # Initialize a flag to determine if the image was resized
            resized = False

            # Check if the image exceeds the maximum dimensions
            if img.width > max_width or img.height > max_height:
                # Calculate the ratio to scale down
                ratio = min(max_width / img.width, max_height / img.height)
                new_width = int(img.width * ratio)
                new_height = int(img.height * ratio)
                # Resize the image
                img = img.resize((new_width, new_height), Image.LANCZOS)
                resized = True

            # Convert image to RGB to avoid issues with RGBA or other modes
            rgb_img = img.convert("RGB")

            # Save the image as JPEG with a high quality to minimize loss
            # Only save if the image was resized to avoid unnecessary re-encoding
            if resized:
                rgb_img.save(
                    image_path, "JPEG", quality=85
                )  # A high quality value that generally preserves detail well
    except IOError:
        print(f"Cannot open/read file: {image_path}")


def process_images(image_list):
    with ThreadPoolExecutor() as executor:
        list(tqdm(executor.map(reencode_jpeg, image_list), total=len(image_list)))


directory_path = "/Volumes/ThirdHeart/Github-Y/gameplay-videos/grouped_dataset"
images = find_images(directory_path)
process_images(images)
print(f"Processed {len(images)} images.")

100%|██████████| 17088/17088 [01:44<00:00, 163.11it/s]

Processed 17088 images.



