Process on folder

In [2]:
import os
import cv2
from PIL import Image
from retinaface import RetinaFace
from transformers import pipeline
from tqdm import tqdm
import time

def detect_image(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Initialize tqdm to show progress bar
    image_files = [f for f in os.listdir(input_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
    progress_bar = tqdm(total=len(image_files), desc="Processing")

    # Iterate over the images in the input folder
    for image_file in image_files:
        start_time = time.time()
        # Path to input image
        image_path = os.path.join(input_folder, image_file)

        # Detect faces using RetinaFace
        resp = RetinaFace.detect_faces(image_path)

        # Load the image
        img = cv2.imread(image_path)

        # Iterate over detected faces
        for face_id, face_data in resp.items():
            # Extract facial area coordinates
            x1, y1, x2, y2 = face_data['facial_area']
            
            # Extract the face region
            face_img = img[y1:y2, x1:x2]

            # Convert the face image to PIL image format
            pil_image = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
        
            # Draw the bounding box on the image
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
            

        # Save the processed image to the output folder
        output_path = os.path.join(output_folder, f"processed_{image_file}")
        cv2.imwrite(output_path, img)

        # Update progress bar
        progress_bar.update(1)

        end_time = time.time()
        process_time = end_time - start_time
        print(f"Processing time for {image_file}: {process_time:.2f} seconds")

    # Close the progress bar
    progress_bar.close()

# Specify the input and output folders
input_folder = "input_folder"
output_folder = "output_frames"

# Detect image and save processed images
detect_image(input_folder, output_folder)


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Processing:  25%|██▌       | 1/4 [00:40<02:00, 40.33s/it]

Processing time for multiple_pople1.jpg: 40.33 seconds


Processing:  50%|█████     | 2/4 [01:25<01:26, 43.39s/it]

Processing time for multiple_pople3.jpg: 45.53 seconds


Processing:  75%|███████▌  | 3/4 [02:31<00:53, 53.57s/it]

Processing time for mutiple_pople2.jpg: 65.68 seconds


Processing: 100%|██████████| 4/4 [03:28<00:00, 52.07s/it]

Processing time for prs.jpg: 56.72 seconds





Process on Video

In [3]:
import cv2
import os
from PIL import Image
from retinaface import RetinaFace
from transformers import pipeline
from tqdm import tqdm

def detect_video(input_video_path, output_video_path):
    # Load the emotion detection pipeline
    emotion_pipe = pipeline('image-classification', model='Saravanan290702/facial_emotions_image_detection')

    # Create a VideoCapture object
    cap = cv2.VideoCapture(input_video_path)
    if not cap.isOpened():
        print("Error: Could not open the video file.")
        return

    # Get the video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    # Initialize tqdm to show progress bar
    progress_bar = tqdm(total=frame_count, desc="Processing")

    # Process each frame of the video
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Detect faces using RetinaFace
        resp = RetinaFace.detect_faces(frame)

        # Iterate over detected faces
        for face_id, face_data in resp.items():
            # Extract facial area coordinates
            x1, y1, x2, y2 = face_data['facial_area']
            
            # Extract the face region
            face_img = frame[y1:y2, x1:x2]

            # Convert the face image to PIL image format
            pil_image = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
            
            # Draw the bounding box on the frame
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
        # Write the frame to the output video
        out.write(frame)

        # Update progress bar
        progress_bar.update(1)

    # Release VideoCapture and VideoWriter objects
    cap.release()
    out.release()

    # Close the progress bar
    progress_bar.close()

    print("Processing complete. Output video saved successfully.")

# Specify the input and output video paths
input_video_path = "1.mp4"
output_video_path = "output_video.avi"

# Detect emotions and draw bounding boxes on the input video frames, and output the processed video
detect_video(input_video_path, output_video_path)


All PyTorch model weights were used when initializing TFViTForImageClassification.

All the weights of TFViTForImageClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.
Processing:   1%|          | 8/1145 [07:10<16:45:32, 53.06s/it]

Run on Webcam

In [1]:
import cv2
from PIL import Image
from retinaface import RetinaFace
from transformers import pipeline

def detect_webcam():
    
    # Create a VideoCapture object for webcam
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open the webcam.")
        return

    # Get the webcam properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = 15  # Assuming a standard webcam captures at 30 frames per second

    # Create a VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter('output_video.avi', fourcc, fps, (frame_width, frame_height))

    # Process each frame from the webcam
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Detect faces using RetinaFace
        resp = RetinaFace.detect_faces(frame)

        # Iterate over detected faces
        for face_id, face_data in resp.items():
            # Extract facial area coordinates
            x1, y1, x2, y2 = face_data['facial_area']

            # Extract the face region
            face_img = frame[y1:y2, x1:x2]

            # Convert the face image to PIL image format
            pil_image = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))

            # Draw the bounding box on the frame
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Write the frame to the output video
        out.write(frame)

        # Display the frame
        cv2.imshow('Webcam Detection', frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release VideoCapture and VideoWriter objects
    cap.release()
    out.release()
    cv2.destroyAllWindows()

    print("Processing complete. Output video saved successfully.")

# Call the function to start webcam detection
detect_webcam()





Single Image input

In [1]:
import cv2
from PIL import Image
from retinaface import RetinaFace
from transformers import pipeline
from tqdm import tqdm

def detect_image(image_path):
    
    # Detect faces using RetinaFace
    resp = RetinaFace.detect_faces(image_path)

    # Load the image
    img = cv2.imread(image_path)

    # Initialize tqdm to show progress bar
    progress_bar = tqdm(total=len(resp), desc="Processing")

    # Iterate over detected faces
    for face_id, face_data in resp.items():
        # Extract facial area coordinates
        x1, y1, x2, y2 = face_data['facial_area']
        
        # Extract the face region
        face_img = img[y1:y2, x1:x2]

        # Convert the face image to PIL image format
        pil_image = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
        
        # Draw the bounding box on the image
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        
        
        # Update progress bar
        progress_bar.update(1)

    # Close the progress bar
    progress_bar.close()

    return img

# Specify the path to the input image
image_path = "masterimage.jpg"

# Detect emotions and draw bounding boxes on the image
result_image = detect_image(image_path)

# Save the image with the bounding boxes and predicted emotions drawn to disk
cv2.imwrite('5-out.jpg', result_image)





Processing: 100%|██████████| 49/49 [00:00<?, ?it/s]


True