In [2]:
import os
from PIL import Image
import pandas as pd
import shutil

In [3]:
from tqdm import tqdm

In [4]:
METADATA_FILE = "download_metadata.csv"
DOWNLOAD_DIR = "downloaded_images"
NEW_OUTPUT_DIR = "successful_images"
RESIZE_SIZE = (640, 640)
IMAGE_FORMAT = "JPEG"
QUALITY = 90

os.makedirs(NEW_OUTPUT_DIR, exist_ok=True)

df = pd.read_csv(METADATA_FILE)
successful_images = df[df["status"] == "success"]

total_images = len(successful_images)
processed_count = 0
failed_count = 0

with tqdm(total=total_images, desc="Processing images", unit="image") as pbar:
    for _, row in successful_images.iterrows():
        src_path = row["path"]
        if pd.notna(src_path) and os.path.exists(src_path):
            try:
                filename = os.path.basename(src_path)
                base_name = os.path.splitext(filename)[0]
                dest_path = os.path.join(NEW_OUTPUT_DIR, f"{base_name}.{IMAGE_FORMAT.lower()}")
                
                with Image.open(src_path) as img:
                    
                    img.thumbnail(RESIZE_SIZE, Image.Resampling.LANCZOS)
                    
                    if img.mode in ("RGBA", "P"):
                        img = img.convert("RGB")
                    
                    img.save(dest_path, IMAGE_FORMAT, quality=QUALITY)
                    
                processed_count += 1
                pbar.set_postfix({"Processed": processed_count, "Failed": failed_count})
                
            except Exception as e:
                failed_count += 1
                pbar.set_postfix({"Processed": processed_count, "Failed": failed_count})
                pbar.write(f"Failed to process {src_path}: {str(e)}") 
        else:
            failed_count += 1
            pbar.set_postfix({"Processed": processed_count, "Failed": failed_count})
        
        pbar.update(1)  

print(f"\nDone! Resized images saved in '{NEW_OUTPUT_DIR}/'")
print(f"Total images: {total_images}")
print(f"Successfully processed: {processed_count}")
print(f"Failed to process: {failed_count}")

Processing images:  41%|████████████████████████████▍                                        | 203/492 [00:09<00:11, 25.90image/s, Processed=203, Failed=1]

Failed to process downloaded_images\image_0232.png: cannot write mode LA as JPEG


Processing images: 100%|█████████████████████████████████████████████████████████████████████| 492/492 [00:24<00:00, 20.35image/s, Processed=491, Failed=1]


Done! Resized images saved in 'successful_images/'
Total images: 492
Successfully processed: 491
Failed to process: 1



