In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append('/content/drive/MyDrive/commit_test_folder/EECE491-01-Capstone-Design')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import cv2
import glob
from tqdm import tqdm
from google.colab import files

from face_detector import FaceDetector

In [None]:
# --- 1. 경로 설정 ---
INPUT_FOLDER = '/content/img_align_celeba'
OUTPUT_FOLDER = '/content/cropped_celeba' # 로컬 결과 폴더

# --- 2. Kaggle API 설정 ---
if not os.path.exists("/root/.kaggle/kaggle.json"):
    files.upload() # kaggle.json 업로드
    !mkdir -p ~/.kaggle
    !cp kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json

In [None]:
# --- 3. 데이터셋 다운로드 및 압축 해제 ---
if not os.path.exists(INPUT_FOLDER):
    print("Kaggle에서 CelebA 데이터셋 다운로드 중...")
    !kaggle datasets download -d jessicali9530/celeba-dataset --path /content/ --quiet
    print("압축 해제 중...")
    !unzip -q /content/celeba-dataset.zip -d /content/
    # !rm /content/celeba-dataset.zip /content/*.csv # 불필요 파일 삭제
    print("다운로드 및 압축 해제 완료.")

Kaggle에서 CelebA 데이터셋 다운로드 중...
Dataset URL: https://www.kaggle.com/datasets/jessicali9530/celeba-dataset
License(s): other
압축 해제 중...
다운로드 및 압축 해제 완료.


In [None]:
# --- 4. 얼굴 탐지 및 잘라내기 ---
face_detector = FaceDetector()
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

image_files = sorted(glob.glob(f'{INPUT_FOLDER}/**/*.jpg', recursive=True))
# NUM_IMAGES_TO_PROCESS = 10000
# image_files_to_process = image_files[:NUM_IMAGES_TO_PROCESS]
image_files_to_process = image_files
print(f"총 {len(image_files_to_process)}개의 이미지를 처리합니다...")

total_faces_cropped = 0
for img_path in tqdm(image_files_to_process, desc="Processing images"):
    original_image = cv2.imread(img_path)
    if original_image is None: continue

    # 탐지된 모든 얼굴 저장
    cropped_faces, _ = face_detector.run(original_image, target_size=640, score_threshold=0.9)

    base_name = os.path.basename(img_path)
    for i, face_img in enumerate(cropped_faces):
        output_filename = f"face_{i}_{base_name}"
        output_path = os.path.join(OUTPUT_FOLDER, output_filename)
        cv2.imwrite(output_path, face_img)
        total_faces_cropped += 1

print(f"\n전처리 완료: 총 {total_faces_cropped}개의 얼굴 이미지를 '{OUTPUT_FOLDER}' 폴더에 저장했습니다.")

총 202599개의 이미지를 처리합니다...


Processing images:  86%|████████▋ | 174933/202599 [2:40:52<26:04, 17.68it/s]

In [None]:
# 잘라낸 얼굴 압축
!tar cf /content/cropped_celeba.tar /content/cropped_celeba

tar: Removing leading `/' from member names


In [None]:
os.makedirs('/content/drive/MyDrive/datasets/cropped_celeba', exist_ok=True)
print(f"Google Drive로 복사 중...")
!cp '/content/cropped_celeba.tar' '/content/drive/MyDrive/datasets/cropped_celeba'/
print("Google Drive 복사 완료.")

Google Drive로 복사 중...
Google Drive 복사 완료.


In [None]:
!tar xf "/content/drive/MyDrive/datasets/cropped_celeba/cropped_celeba.tar" -C "/content/drive/MyDrive/datasets/" --strip-components=1

In [None]:
# 제거
!rm -rf /content/img_align_celeba /content/cropped_celeba_all /content/celeba-dataset.zip