####Used Model MTCNN for face detection and cropping

after i read a documention about the study of gender detection of masked faces, i came across this model MTCNN which is supposed to have a fairly higher cacuracy. I implemented the model and it did work but the model was too slow cropping images at a rate of 0.4ms/image. After waiting for 5 hours approx, i got 18k cropped images saved in a separate folder called haarcasscade which i have uploaded in the repo as well.


In [None]:
import os
import cv2
from mtcnn import MTCNN
import numpy as np
from sklearn.model_selection import train_test_split
from concurrent.futures import ThreadPoolExecutor

# Function to process each image
def process_image(filename, dataset_path, output_path, image_size=(96, 96)):
    image_path = os.path.join(dataset_path, filename)
    image = cv2.imread(image_path)
    detector = MTCNN()
    
    # Resize the image
    image = cv2.resize(image, image_size)
    
    # Detect faces in the image
    result = detector.detect_faces(image)
    if len(result) == 0:
        print(f'No face detected on {image_path}')
        return None, None
    
    # For this example, we will use the first detected face
    face = result[0]
    bounding_box = face['box']
    
    x, y, width, height = bounding_box
    x2, y2 = x + width, y + height
    
    # Crop the face from the image
    cropped_face = image[y:y2, x:x2]
    
    # Extract the label from the filename (assuming the format is age_gender_race_date.jpg)
    label = filename.split('_')[1]  # Adjust based on the required label (age/gender/race)
    
    # Ensure output_path has a valid image file extension and save the cropped face
    output_file = os.path.join(output_path, f"{label}_{os.path.splitext(filename)[0]}.jpg")
    cv2.imwrite(output_file, cropped_face)
    
    return image, label

# Function to handle parallel processing
def process_images_in_parallel(dataset_path, output_path, max_workers=4):
    data = []
    labels = []
    
    filenames = [f for f in os.listdir(dataset_path) if f.endswith(".jpg")]
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(process_image, filename, dataset_path, output_path) for filename in filenames]
        
        for future in futures:
            result = future.result()
            if result[0] is not None:
                data.append(result[0])
                labels.append(result[1])
    
    return data, labels

# Define the paths
dataset_path = r'D:\ESC4\pclub_task3\dataset'
output_path = r'D:\ESC4\pclub_task3\haarcascade'

# Create the output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

# Process images and labels
data, labels = process_images_in_parallel(dataset_path, output_path)

# Convert the data and labels to NumPy arrays
data = np.array(data, dtype=np.float32) / 255.0
labels = np.array(labels, dtype=np.float32)

# Split the data into training and testing sets
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.2, random_state=42)

# Print the shape of the datasets
print(f"Training data shape: {trainX.shape}")
print(f"Testing data shape: {testX.shape}")
print(f"Training labels shape: {trainY.shape}")
print(f"Testing labels shape: {testY.shape}")


####Double Checking of faces detected in the cropped Images

I had this folder of cropped images and on scimming through it i found weirdly cropped images in which face wasn't fully present. So i decided to implement another model i.e. harcascade to check on the detected images. I got a probability of the facebein detected and if the probability was less than 0.5, i did os.remove(file) which led ot made dataset of cropped images being reduced from 18k to 8k in 10 seconds.

In [None]:
def detect_and_filter_faces(input_folder, haar_cascade_path):
    face_cascade = cv2.CascadeClassifier(haar_cascade_path)
    images=[]
    labels=[]
    c=0
    for img_name in os.listdir(input_folder):
        # print(img_name)
        img_path = os.path.join(input_folder, img_name)
        # print(img_path)
        # if not os.path.isdir(img_path):
        #     continue
        
        # output_img_path = os.path.join(output_folder, img)
        # os.makedirs(output_img_path, exist_ok=True)
        
        # for image_name in os.listdir(img_path):
        # print(img_name)
        # image_path = os.path.join(img_path, image_name)
        image = cv2.imread(img_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        label=int(img_name.split('_')[1])
        # print(image_name)
        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        if len(faces) == 0:
            os.remove(img_path)  # Delete the image if no face is detected
            print(f"No face detected in {img_path}, deleted.")
            c+=1
            continue
        for (x, y, w, h) in faces:
            # Crop the top 50% of the detected face
            top_half_face = image[y:y + h//2, x:x + w]
            top_half_face=cv2.resize(top_half_face, (96,96))
            images.append(top_half_face)
            labels.append(label)
            # print(label)
            # cropped_image_name = f"cropped_{image_name}"
            # cropped_image_path = os.path.join(output_img_path, cropped_image_name)
            # cv2.imwrite(cropped_image_path, top_half_face)
            # print(f"Saved cropped face to {cropped_image_path}")
    images=np.array(images)
    labels=np.array(labels)
    return images, labels, c

input_folder = r"D:\ESC4\pclub_task3\dataset"
output_folder = r"D:\ESC4\pclub_task3\haarcascade"
haar_cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
images, labels, c=detect_and_filter_faces(input_folder, haar_cascade_path)
print(c)
