In [10]:
import cv2
import os
import shutil
import matplotlib.pyplot as plt
from tqdm import tqdm
# from deepface import DeepFace

In [11]:
# Define the directories
src_dir = './archive/'
dst_dir = './cropped/'

no_face_files = []

In [12]:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [13]:
def process_subfolders(folder_path):
    num_files = sum([len(files) for _, _, files in os.walk(folder_path)])
    processed_files = 0

    with tqdm(total=num_files, desc='Processing files') as pbar:
        for dirpath, dirnames, filenames in os.walk(folder_path):
            # Get the subfolder name
            subfolder_name = os.path.basename(dirpath)

            # Check if the subfolder name is larger than 3999
            # if subfolder_name.isdigit() and int(subfolder_name) > 3999:
            for filename in filenames:
                if filename.endswith('.png'):
                    # Construct the full source file path
                    src_file = os.path.join(dirpath, filename)

                    # Process the file
                    process_file(src_file)
                    processed_files += 1
                    pbar.update(1)

    print(f"Number of files in {src_dir}: {num_files}")
    print(f"Number of files processed: {processed_files}")

In [14]:
def process_file(file_path):
    # Load the image in grayscale format
    image = cv2.imread(file_path, 0)

    # Detect faces in the grayscale image
    faces = face_cascade.detectMultiScale(image, scaleFactor=1.5, minNeighbors=3, minSize=(100, 100))


    if len(faces) != 0:
        dst_file = os.path.join(dst_dir, os.path.relpath(file_path, src_dir))

        for i, (x, y, w, h) in enumerate(faces):
            face_img = image[y:y+h, x:x+w]
            face_resized = cv2.resize(face_img, (128, 128))
            os.makedirs(os.path.dirname(dst_file), exist_ok=True)
            cv2.imwrite(dst_file, face_resized)


    else:
        # No face detected, add file path to the list
        no_face_files.append(file_path)

In [15]:
process_subfolders(src_dir)

Processing files: 100%|██████████| 6/6 [00:00<00:00,  9.04it/s]

Number of files in ./archive/: 6
Number of files processed: 6





In [16]:
print("Files without detected faces with haarcascade:", len(no_face_files))

Files without detected faces with haarcascade: 2
