In [16]:
import cv2
import os

In [17]:
# Load the Haar Cascade file
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')


In [35]:
# Function to detect faces in a single image
def has_faces(image_path, face_cascade):
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        return False  # Handle invalid image files gracefully
    
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    
    # Detect faces
    faces = face_cascade.detectMultiScale(gray_image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
    
    # Return True if any faces are detected, False otherwise
    return len(faces) > 0

# Function to recursively traverse the dataset directory and count images with faces
def count_artworks_with_faces_recursive(root_dir, face_cascade):
    artworks_with_faces = 0
    total_artworks = 0

    # Walk through the directory structure recursively
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            # Ensure file is a valid image type
            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
                image_path = os.path.join(subdir, file)
                total_artworks += 1
                if has_faces(image_path, face_cascade):
                    artworks_with_faces += 1

    return artworks_with_faces, total_artworks

In [39]:
dataset_path = '/home/oem/eliza/DL/project/data/deepfakeart/original_all'

# Count artworks with faces
artworks_with_faces, total_artworks = count_artworks_with_faces_recursive(dataset_path, face_cascade)

# Print the result
if total_artworks > 0:
    print(f"Number of artworks with faces: {artworks_with_faces}")
    print(f"Total number of artworks processed: {total_artworks}")
    print(f"Percentage of artworks with faces: {artworks_with_faces / total_artworks * 100:.2f}%")
else:
    print("No valid artworks found in the dataset.")



Number of artworks with faces: 5078
Total number of artworks processed: 11047
Percentage of artworks with faces: 45.97%
