#  Video to image 

In [10]:
import cv2
import os
import time
from datetime import datetime

def extract_frames_from_camera(output_folder, frame_rate=1, x=0, y=0, w=100, h=100, camera_source=0):
    
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)



    while True:
        cap = cv2.VideoCapture(camera_source)
        ret, frame = cap.read()
        # Break the loop if no frames are returned from the camera
        if not ret or frame is None:
            print("Error: Could not retrieve frame from source.")
            break

        # Ensure ROI is within the frame bounds
        height, width, _ = frame.shape  # Get frame dimensions
        if x + w > width or y + h > height:
            print(f"Invalid ROI: {x=}, {y=}, {w=}, {h=}. Frame size: {width}x{height}")
            return

        # Crop the frame to the specified ROI
        cropped_frame = frame[y:y + h, x:x + w]

        # Ensure cropped frame is not empty before saving
        if cropped_frame.size == 0:
            print("Error: Cropped frame is empty.")
            return

        # Get current date and time for the filename
        current_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
        output_filename = os.path.join(output_folder, f"frame_{current_time}.jpeg")

        # Save the frame
        cv2.imwrite(output_filename, cropped_frame)
        print(f"Saved {output_filename}")

        # Wait for the specified interval before capturing the next frame
        cap.release()

    print("Finished extracting frames.")

# Example usage for an IP camera:
output_folder = 'camera_frames'
camera_url = 'rtsp://admin:inel378379@demo.smartvisor.tn:554/cam/realmonitor?channel=1&subtype=0&unicast=true&proto=Onvif'
# Coordinates for the ROI (x, y, w, h)
extract_frames_from_camera(output_folder, frame_rate=1, x=100, y=0, w=1100, h=1080, camera_source=camera_url)

Saved camera_frames\frame_2024-05-28_15-54-31.jpeg
Saved camera_frames\frame_2024-05-28_15-55-00.jpeg
Saved camera_frames\frame_2024-05-28_15-55-23.jpeg
Saved camera_frames\frame_2024-05-28_15-55-38.jpeg
Saved camera_frames\frame_2024-05-28_15-56-08.jpeg
Saved camera_frames\frame_2024-05-28_15-56-33.jpeg
Saved camera_frames\frame_2024-05-28_15-56-47.jpeg
Saved camera_frames\frame_2024-05-28_15-57-17.jpeg
Saved camera_frames\frame_2024-05-28_15-57-33.jpeg
Saved camera_frames\frame_2024-05-28_15-58-01.jpeg
Saved camera_frames\frame_2024-05-28_15-58-31.jpeg
Saved camera_frames\frame_2024-05-28_15-58-54.jpeg
Saved camera_frames\frame_2024-05-28_15-59-10.jpeg
Saved camera_frames\frame_2024-05-28_15-59-24.jpeg
Saved camera_frames\frame_2024-05-28_15-59-51.jpeg
Saved camera_frames\frame_2024-05-28_16-00-22.jpeg
Saved camera_frames\frame_2024-05-28_16-00-48.jpeg
Saved camera_frames\frame_2024-05-28_16-01-10.jpeg
Saved camera_frames\frame_2024-05-28_16-01-22.jpeg
Saved camera_frames\frame_2024-

KeyboardInterrupt: 

# Face detection , cropping and emotion

In [21]:
import os
import cv2
from ultralytics import YOLO
from facenet_pytorch import MTCNN
from deepface import DeepFace

# Function to detect faces and analyze emotions
def face_emotion_detection(image, filename, output_folder, face_confidence_threshold=0.4):
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Initialize MTCNN for face detection
    mtcnn = MTCNN(keep_all=True, device="cpu")

    # Detect faces and extract them with their confidences
    boxes, confidences = mtcnn.detect(image)
    if boxes is not None and confidences is not None:
        for i, (box, confidence) in enumerate(zip(boxes, confidences)):
            # Only consider faces with confidence greater than the threshold
            if confidence > face_confidence_threshold:
                # Ensure valid bounding box coordinates
                x1, y1, x2, y2 = box.astype(int)

                # Extract the face from the main image
                face = image[y1:y2, x1:x2]

                # Check if the face is valid (non-empty)
                if face is not None and face.size > 0:
                    # Detect emotion using DeepFace
                    result = DeepFace.analyze(face, actions=["emotion"], enforce_detection=False)

                    # Ensure result is a dictionary
                    if isinstance(result, list):
                        result = result[0]  # Get the first element if it's a list

                    # Get the detected emotion
                    emotion = result.get("dominant_emotion", "unknown")  # Fallback to "unknown" if not found

                    # Save the face with the detected emotion in the filename
                    output_filename = f"{filename.split('.')[0]}_face_{i}_{emotion}.jpg"
                    output_path = os.path.join(output_folder, output_filename)

                    cv2.imwrite(output_path, face)  # Save the cropped face
                    print(f"Face saved with emotion: {output_path}")

    else:
        print(f"No faces detected in {filename}.")

# Define input and output folders
input_folder = "outputcameratest"  # Folder containing images to process
output_folder = "faces5"  # Folder to save detected faces with emotion

# Load the YOLO model for person detection
model = YOLO("yolov8n.pt")

# Confidence thresholds
yolo_confidence_threshold = 0.7  # YOLO detection threshold

# Get all image files in the input folder with valid extensions
image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

# Loop through all files in the input folder
for image_file in image_files:
    # Get the full path of the image file
    image_path = os.path.join(input_folder, image_file)
    
    # Perform inference to detect persons using YOLO
    results = model(image_path)
    
    # Load the image with OpenCV
    image = cv2.imread(image_path)
    
    # Check if any persons are detected with high confidence
    if results:
        for result in results:
            # Loop through all detected boxes
            for detection in result.boxes:
                if detection.cls == 0 and detection.conf >= yolo_confidence_threshold:
                    # Get bounding box coordinates
                    xmin, ymin, xmax, ymax = detection.xyxy[0]

                    # Crop the person from the image
                    person_crop = image[int(ymin):int(ymax), int(xmin):int(xmax)]

                    # Detect faces and analyze emotions using the face_emotion_detection function
                    face_emotion_detection(person_crop, image_file, output_folder, face_confidence_threshold=0)



image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-10-45.jpeg: 640x640 3 persons, 9 cars, 219.0ms
Speed: 9.0ms preprocess, 219.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-10-45.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-10-49.jpeg: 640x640 3 persons, 24 cars, 118.0ms
Speed: 6.0ms preprocess, 118.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-10-56.jpeg: 640x640 1 person, 14 cars, 1 refrigerator, 98.0ms
Speed: 4.0ms preprocess, 98.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-10-58.jpeg: 640x640 1 person, 12 cars, 116.0ms
Speed: 7.0ms preprocess, 116.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-11-00.jpeg: 640x6

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-12-16.jpeg: 640x640 2 persons, 120.0ms
Speed: 6.0ms preprocess, 120.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-12-16.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-12-18.jpeg: 640x640 1 person, 127.7ms
Speed: 5.0ms preprocess, 127.7ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-12-20.jpeg: 640x640 1 person, 141.0ms
Speed: 6.0ms preprocess, 141.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-12-20.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-12-22.jpeg: 640x640 1 person, 144.0ms
Speed: 6.0ms preprocess, 144.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-12-24.jpeg: 640


image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-14-11.jpeg: 640x640 15 cars, 143.0ms
Speed: 8.0ms preprocess, 143.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-14-13.jpeg: 640x640 (no detections), 124.0ms
Speed: 7.0ms preprocess, 124.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-14-19.jpeg: 640x640 1 parking meter, 130.0ms
Speed: 7.0ms preprocess, 130.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-14-24.jpeg: 640x640 5 cars, 133.0ms
Speed: 6.0ms preprocess, 133.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-14-35.jpeg: 640x640 3 persons, 17 cars, 119.0ms
Speed: 5.0ms preprocess, 119.0ms inference, 3.0ms postprocess 

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-17-07.jpeg: 640x640 3 persons, 1 car, 1 bus, 118.0ms
Speed: 7.0ms preprocess, 118.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-17-07.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-17-11.jpeg: 640x640 3 persons, 1 car, 1 skateboard, 113.0ms
Speed: 7.0ms preprocess, 113.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-17-11.jpeg.
No faces detected in frame_2024-05-28_16-17-11.jpeg.
No faces detected in frame_2024-05-28_16-17-11.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-17-13.jpeg: 640x640 3 persons, 121.0ms
Speed: 9.0ms preprocess, 121.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-17-13.jpeg.
No faces detected in frame_2024-05-28_16-17-13.jpeg.

image 1/1 C:\Users\tikou\pfa\output

Speed: 6.0ms preprocess, 109.6ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-19-34.jpeg: 640x640 5 cars, 107.7ms
Speed: 4.0ms preprocess, 107.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-19-50.jpeg: 640x640 8 persons, 110.0ms
Speed: 3.0ms preprocess, 110.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-19-50.jpeg.
No faces detected in frame_2024-05-28_16-19-50.jpeg.
No faces detected in frame_2024-05-28_16-19-50.jpeg.
No faces detected in frame_2024-05-28_16-19-50.jpeg.
No faces detected in frame_2024-05-28_16-19-50.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-19-58.jpeg: 640x640 5 persons, 121.0ms
Speed: 6.0ms preprocess, 121.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)
Face saved with emotion: fac


image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-22-27.jpeg: 640x640 3 persons, 1 car, 1 truck, 1 dog, 107.1ms
Speed: 3.0ms preprocess, 107.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-22-29.jpeg: 640x640 2 cars, 103.2ms
Speed: 4.0ms preprocess, 103.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-22-33.jpeg: 640x640 2 cars, 119.0ms
Speed: 4.0ms preprocess, 119.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-22-36.jpeg: 640x640 2 persons, 1 car, 105.2ms
Speed: 4.0ms preprocess, 105.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
Face saved with emotion: faces5\frame_2024-05-28_16-22-36_face_0_neutral.jpg

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-22-41.jpeg: 640x


image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-25-14.jpeg: 640x640 15 cars, 1 truck, 113.3ms
Speed: 7.0ms preprocess, 113.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-25-19.jpeg: 640x640 3 persons, 1 car, 1 umbrella, 110.1ms
Speed: 4.0ms preprocess, 110.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-25-19.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-25-25.jpeg: 640x640 3 persons, 1 car, 128.0ms
Speed: 5.0ms preprocess, 128.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-25-25.jpeg.

image 1/1 C:\Users\tikou\pfa\outputcameratest\frame_2024-05-28_16-25-29.jpeg: 640x640 1 person, 126.0ms
Speed: 6.0ms preprocess, 126.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)
No faces detected in frame_2024-05-28_16-25-29

# Number of people 

In [20]:
import os
import cv2
import torch
import numpy as np
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image  # Import PIL for image operations
import shutil

# Instantiate the MTCNN model for face detection
mtcnn = MTCNN(keep_all=True, device='cuda' if torch.cuda.is_available() else 'cpu')  # Set `keep_all=True` to detect all faces in an image

# Instantiate the InceptionResnetV1 model for face embeddings
resnet = InceptionResnetV1(pretrained='vggface2').eval()  # Load a pre-trained model

# Function to get face embeddings and crops
def get_face_embeddings_and_crops(image):
    # Detect faces and extract them using MTCNN
    try:
        faces, probs = mtcnn(image, return_prob=True)
    except RuntimeError as e:
        print(f"RuntimeError during face detection: {e}")
        return None, None
    
    if faces is not None and len(faces) > 0:
        # Compute embeddings for each detected face
        embeddings = resnet(faces)
        return embeddings, faces  # Return embeddings and cropped faces
    return None, None

# Function to compare embeddings
def face_matches(embedding1, embedding2, threshold=0.95):
    # Calculate the distance between two embeddings
    distance = (embedding1 - embedding2).norm().item()
    return distance < threshold  # Return True if distance is below threshold

# Example usage
input_folder = "faces4"
output_folder = "ff2"  # Folder to save new faces
os.makedirs(output_folder, exist_ok=True)  # Ensure output folder exists

image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

face_encodings = {}  # Dictionary to store face embeddings and their corresponding image file names
unique_faces = 0  # Counter for unique faces

# Loop through image files to extract face embeddings
for image_file in image_files:
    image_path = os.path.join(input_folder, image_file)

    # Load the image with OpenCV
    image = cv2.imread(image_path)

    # Convert the image from BGR (OpenCV) to RGB (PIL) for MTCNN
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Get face embeddings and face crops from the image
    embeddings, face_crops = get_face_embeddings_and_crops(image_rgb)

    if embeddings is not None and face_crops is not None:
        # Loop through each embedding and corresponding face crop
        for embedding, face_crop in zip(embeddings, face_crops):
            # Check if this face matches any existing face in the dictionary
            match_found = False

            for known_file, known_embedding in face_encodings.items():
                if face_matches(known_embedding, embedding):
                    match_found = True
                    break

            if not match_found:
                # If no match is found, add it to the dictionary
                face_encodings[image_file] = embedding
                unique_faces += 1  # Increment the counter for new faces
                out_image_path = os.path.join(output_folder, image_file)
                shutil.copy(image_path, out_image_path)

# Show the total number of unique faces detected
print(f"Total number of unique faces detected: {unique_faces}")


RuntimeError during face detection: torch.cat(): expected a non-empty list of Tensors
RuntimeError during face detection: torch.cat(): expected a non-empty list of Tensors
Total number of unique faces detected: 16
