In [11]:
import cv2
import mediapipe as mp
import os

# Initialize Mediapipe face detector
mp_face_detection = mp.solutions.face_detection
detector = mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5)

# Input (raw dataset) and output (cropped faces) directories
input_dir = r"D:\Final_Semester_Project\AI_Attendance_System\AI_And_ML_Model\data\raw"
output_dir = r"D:\Final_Semester_Project\AI_Attendance_System\AI_And_ML_Model\data\faces"

os.makedirs(output_dir, exist_ok=True)

# Padding factor (e.g., 0.25 = 25% extra on all sides)
padding_factor = 0.25

# Loop through each person’s folder
for person_name in os.listdir(input_dir):
    person_path = os.path.join(input_dir, person_name)
    if not os.path.isdir(person_path):
        continue  # skip if not a folder

    # Create corresponding output folder
    person_output = os.path.join(output_dir, person_name)
    os.makedirs(person_output, exist_ok=True)

    # Loop through each image in the folder
    for img_name in os.listdir(person_path):
        img_path = os.path.join(person_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            continue

        # Convert to RGB for Mediapipe
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = detector.process(rgb)

        if not results.detections:
            continue  # skip if no face found

        # Find the largest face
        largest = max(
            results.detections,
            key=lambda det: det.location_data.relative_bounding_box.width *
                            det.location_data.relative_bounding_box.height
        )

        # Extract bounding box
        box = largest.location_data.relative_bounding_box
        h, w, _ = img.shape
        x, y = int(box.xmin * w), int(box.ymin * h)
        bw, bh = int(box.width * w), int(box.height * h)

        # Add padding
        pad_w = int(bw * padding_factor)
        pad_h = int(bh * padding_factor)
        x1 = max(0, x - pad_w)
        y1 = max(0, y - pad_h)
        x2 = min(w, x + bw + pad_w)
        y2 = min(h, y + bh + pad_h)

        face = img[y1:y2, x1:x2]

        if face.size == 0:
            continue

        # Save cropped face (no resizing)
        out_path = os.path.join(person_output, img_name)
        cv2.imwrite(out_path, face)

print("✅ Cropping completed with padding! Faces saved in:", output_dir)



✅ Cropping completed with padding! Faces saved in: D:\Final_Semester_Project\AI_Attendance_System\AI_And_ML_Model\data\faces


** Code to delete images that are small than 224 * 224 Size 

In [None]:
import cv2
import os

# Directory containing cropped face images
faces_dir = r"D:\Final_Semester_Project\AI_Attendance_System\AI_And_ML_Model\data\faces"

# Minimum width and height
min_size = 224

# Loop through each person's folder
for person_name in os.listdir(faces_dir):
    person_path = os.path.join(faces_dir, person_name)
    if not os.path.isdir(person_path):
        continue

    # Loop through each image in the folder
    for img_name in os.listdir(person_path):
        img_path = os.path.join(person_path, img_name)
        img = cv2.imread(img_path)

        if img is None:
            continue

        h, w, _ = img.shape

        # Delete image if smaller than min_size
        if h < min_size or w < min_size:
            os.remove(img_path)
            print(f"Deleted {img_path} ({w}x{h})")

print("✅ Small images deleted!")


In [14]:
import os

# Directory containing all person folders
faces_dir = r"D:\Final_Semester_Project\AI_Attendance_System\AI_And_ML_Model\data\faces"

min_images = 65  # Threshold for minimum images
folders_below_threshold = 0

print("📂 Image Count per Person")
print("=" * 40)

# Loop through each person's folder
for person_name in os.listdir(faces_dir):
    person_path = os.path.join(faces_dir, person_name)
    if not os.path.isdir(person_path):
        continue

    # Count image files (common image extensions)
    image_count = sum(
        1 for f in os.listdir(person_path)
        if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))
    )

    print(f"{person_name:<20} : {image_count} images")

    # Check if below threshold
    if image_count < min_images:
        folders_below_threshold += 1

print("=" * 40)
print(f"Number of folders with less than {min_images} images: {folders_below_threshold}")


📂 Image Count per Person
Aishwarya_Rai        : 82 images
Akshay_Kumar         : 70 images
Alia_Bhatt           : 70 images
Allu_Arjun           : 70 images
Amitabh_Bachchan     : 70 images
Angelina_Jolie       : 75 images
Ariana_Grande        : 70 images
Barack_Obama         : 70 images
Bhuwan_K.C           : 70 images
Billie_Eilish        : 79 images
Bill_Gates           : 70 images
Brad_Pitt            : 75 images
Chris_Evans          : 77 images
Cristiano_Ronaldo    : 70 images
Deepika_Padukone     : 77 images
Drake                : 70 images
Dwayne_Johnson       : 86 images
Ed_Sheeran           : 81 images
Emma_Watson          : 96 images
Hrithik_Roshan       : 70 images
Jackie_Chan          : 70 images
Jeff_Bezos           : 75 images
Jennifer_Lawrence    : 98 images
Justin_Bieber        : 88 images
Kamal_Haasan         : 70 images
Kareena_Kapoor       : 70 images
Kevin_Hart           : 73 images
Kim_Kardashian       : 95 images
Kylie_Jenner         : 80 images
Leonardo_DiCaprio 