In [1]:
import os
import numpy as np
import tensorflow as tf
from PIL import Image


In [4]:
import os
import numpy as np
from PIL import Image

# Define paths
input_root = r"P:\Project\Frames"  # Raw frames
output_root = r"P:\Project\frames_preprocessing"  # Processed frames
log_file = "failed_frames.log"  # File to log failures

# Target size
IMAGE_SIZE = (224, 224)

# Function to preprocess a single image
def preprocess_image(image_path):
    try:
        image = Image.open(image_path).convert("RGB")  # Ensure RGB format
        image = image.resize(IMAGE_SIZE)  # Resize
        return image, None  # Return processed image, no error
    except Exception as e:
        return None, str(e)  # Return error message

# Function to process a dataset type (Training, Validation, Testing)
def preprocess_dataset(dataset_type, log_failed):
    input_path = os.path.join(input_root, dataset_type)
    output_path = os.path.join(output_root, dataset_type)
    
    if not os.path.exists(input_path):
        print(f"❌ {dataset_type} dataset path does not exist: {input_path}")
        return

    os.makedirs(output_path, exist_ok=True)
    print(f"🚀 Processing {dataset_type} dataset...")

    failed_frames = []

    for person in os.listdir(input_path):
        person_path = os.path.join(input_path, person)
        person_out_path = os.path.join(output_path, person)
        os.makedirs(person_out_path, exist_ok=True)

        for video in os.listdir(person_path):
            video_path = os.path.join(person_path, video)
            video_out_path = os.path.join(person_out_path, video)
            os.makedirs(video_out_path, exist_ok=True)

            for frame in sorted(os.listdir(video_path)):  # Maintain order
                frame_path = os.path.join(video_path, frame)
                save_path = os.path.join(video_out_path, frame)  # Save as .jpg

                # Preprocess and save
                image, error = preprocess_image(frame_path)
                if image is not None:
                    image.save(save_path, "JPEG")  # Save as JPG
                else:
                    failed_frames.append(f"{frame_path} - Error: {error}")

            print(f"✅ {dataset_type} -> {person}/{video} processed!")

    if failed_frames:
        with open(log_failed, "a") as log:
            log.write(f"=== {dataset_type} Failed Frames ===\n")
            log.write("\n".join(failed_frames) + "\n\n")

    print(f"🎯 {dataset_type} Preprocessing Completed!")

# Clear previous log
open(log_file, "w").close()


In [5]:
# Process datasets one by one
preprocess_dataset("Train", log_file)


🚀 Processing Train dataset...
✅ Train -> 110001/1100011002 processed!
✅ Train -> 110001/1100011003 processed!
✅ Train -> 110001/1100011004 processed!
✅ Train -> 110001/1100011005 processed!
✅ Train -> 110001/1100011006 processed!
✅ Train -> 110001/1100011007 processed!
✅ Train -> 110001/1100011008 processed!
✅ Train -> 110001/1100011009 processed!
✅ Train -> 110001/1100011010 processed!
✅ Train -> 110001/1100011011 processed!
✅ Train -> 110001/1100011012 processed!
✅ Train -> 110001/1100011013 processed!
✅ Train -> 110001/1100011014 processed!
✅ Train -> 110001/1100011015 processed!
✅ Train -> 110001/1100011016 processed!
✅ Train -> 110001/1100011017 processed!
✅ Train -> 110001/1100011018 processed!
✅ Train -> 110001/1100011019 processed!
✅ Train -> 110001/1100011020 processed!
✅ Train -> 110001/1100011021 processed!
✅ Train -> 110001/1100011022 processed!
✅ Train -> 110001/1100011023 processed!
✅ Train -> 110001/1100011025 processed!
✅ Train -> 110001/1100011026 processed!
✅ Train ->

In [None]:
preprocess_dataset("Validation", log_file)

🚀 Processing Validation dataset...
✅ Validation -> 400022/4000221001 processed!
✅ Validation -> 400022/4000221002 processed!
✅ Validation -> 400022/4000221006 processed!
✅ Validation -> 400022/4000221008 processed!
✅ Validation -> 400022/4000221009 processed!
✅ Validation -> 400022/4000221010 processed!
✅ Validation -> 400022/4000221011 processed!
✅ Validation -> 400022/4000221013 processed!
✅ Validation -> 400022/4000221014 processed!
✅ Validation -> 400022/4000221015 processed!
✅ Validation -> 400022/4000221016 processed!
✅ Validation -> 400022/4000221017 processed!
✅ Validation -> 400022/4000221018 processed!
✅ Validation -> 400022/4000221024 processed!
✅ Validation -> 400022/4000221033 processed!
✅ Validation -> 400022/4000221034 processed!
✅ Validation -> 400022/4000221035 processed!
✅ Validation -> 400022/4000221036 processed!
✅ Validation -> 400022/4000221040 processed!
✅ Validation -> 400022/4000221041 processed!
✅ Validation -> 400022/4000221042 processed!
✅ Validation -> 4000

In [7]:
preprocess_dataset("Test", log_file)

🚀 Processing Test dataset...
✅ Test -> 500044/5000441001 processed!
✅ Test -> 500044/5000441002 processed!
✅ Test -> 500044/5000441003 processed!
✅ Test -> 500044/5000441005 processed!
✅ Test -> 500044/5000441006 processed!
✅ Test -> 500044/5000441007 processed!
✅ Test -> 500044/5000441008 processed!
✅ Test -> 500044/5000441009 processed!
✅ Test -> 500044/5000441010 processed!
✅ Test -> 500044/5000441012 processed!
✅ Test -> 500044/5000441013 processed!
✅ Test -> 500044/5000441014 processed!
✅ Test -> 500044/5000441015 processed!
✅ Test -> 500044/5000441016 processed!
✅ Test -> 500044/5000441017 processed!
✅ Test -> 500044/5000441018 processed!
✅ Test -> 500044/5000441021 processed!
✅ Test -> 500044/5000441022 processed!
✅ Test -> 500044/5000441023 processed!
✅ Test -> 500044/5000441024 processed!
✅ Test -> 500044/5000441027 processed!
✅ Test -> 500044/5000441030 processed!
✅ Test -> 500044/5000441031 processed!
✅ Test -> 500044/5000441032 processed!
✅ Test -> 500044/5000441033 process