In [2]:
import os
import math
from pathlib import Path
from PIL import Image

# ✨ 여기만 수정 ✨
src_root = Path("./MSMT17_V1/train")        # train 폴더 경로
dst_root = Path("./MSMT17_V1/train_processed")     # 저장할 폴더 경로
model = "qwen2"                           # "llava" 또는 "qwen2" 중 선택

def target_shape_llava(h, w, short_max=672, long_max=1344):
    scale = min(short_max / min(h, w), long_max / max(h, w), 1.0)
    return (int(round(h * scale)), int(round(w * scale)))

def target_shape_qwen2(h, w, max_pixels=1_000_000):
    cur_pixels = h * w
    if cur_pixels <= max_pixels:
        return (h, w)
    scale = math.sqrt(max_pixels / cur_pixels)
    return (int(round(h * scale)), int(round(w * scale)))

def process_image(src_path: Path, dst_path: Path, model: str):
    img = Image.open(src_path).convert("RGB")
    h, w = img.height, img.width
    if model == "llava":
        nh, nw = target_shape_llava(h, w)
    elif model == "qwen2":
        nh, nw = target_shape_qwen2(h, w)
    else:
        raise ValueError("model must be 'llava' or 'qwen2'")

    if (nh, nw) != (h, w):
        img = img.resize((nw, nh), Image.BICUBIC)

    dst_path.parent.mkdir(parents=True, exist_ok=True)
    img.save(dst_path, quality=95)

def main():
    exts = {".jpg", ".jpeg", ".png", ".webp"}
    images = [p for p in src_root.rglob("*") if p.suffix.lower() in exts]

    print(f"[INFO] {len(images)}개 이미지 처리 시작...")
    for idx, src_path in enumerate(images):
        rel_path = src_path.relative_to(src_root)    # src_root 이후부터 상대경로
        dst_path = dst_root / rel_path

        try:
            process_image(src_path, dst_path, model)
        except Exception as e:
            print(f"[WARNING] {src_path} 처리 실패: {e}")

        if idx % 100 == 0:
            print(f"  >> {idx}/{len(images)} 완료 ({rel_path})")
    
    print(f"[DONE] 전체 완료! 저장 위치: {dst_root}")

if __name__ == "__main__":
    main()

[INFO] 32621개 이미지 처리 시작...
  >> 0/32621 완료 (0000/0000_000_01_0303morning_0008_0.jpg)
  >> 100/32621 완료 (0001/0001_048_01_0303morning_0756_0_ex.jpg)
  >> 200/32621 완료 (0006/0006_000_01_0303morning_0052_9.jpg)
  >> 300/32621 완료 (0010/0010_014_05_0303morning_0132_1.jpg)
  >> 400/32621 완료 (0014/0014_023_14_0303morning_0416_1.jpg)
  >> 500/32621 완료 (0019/0019_005_01_0303morning_0244_1_ex.jpg)
  >> 600/32621 완료 (0024/0024_010_05_0303morning_0366_0.jpg)
  >> 700/32621 완료 (0027/0027_012_07_0303morning_0395_4_ex.jpg)
  >> 800/32621 완료 (0029/0029_021_15_0303morning_0655_0.jpg)
  >> 900/32621 완료 (0032/0032_050_15_0302morning_0555_3_ex.jpg)
  >> 1000/32621 완료 (0037/0037_002_01_0303morning_0536_0.jpg)
  >> 1100/32621 완료 (0040/0040_004_01_0303morning_0596_3.jpg)
  >> 1200/32621 완료 (0043/0043_007_01_0303morning_0681_2_ex.jpg)
  >> 1300/32621 완료 (0047/0047_000_01_0303morning_0717_2.jpg)
  >> 1400/32621 완료 (0050/0050_015_05_0303morning_0780_4_ex.jpg)
  >> 1500/32621 완료 (0055/0055_001_01_0303morning_087