In [None]:
pwd

In [None]:
!nvidia-smi

In [None]:
import torch
torch.cuda.is_available()

In [None]:
%pip install streamlit deepface smtplib

In [None]:
# =========================================
# Computer Vision Surveillance Demo
# =========================================
# Compatible with Jupyter Notebook / Google Colab
# Requirements: ultralytics, streamlit, opencv-python, deepface, smtplib

import cv2
import numpy as np
import smtplib
from email.mime.text import MIMEText
from ultralytics import YOLO
from deepface import DeepFace

# Load YOLO model (pre-trained COCO dataset)
yolo_model = YOLO("yolo11n.pt")

# Define authorized persons directory
AUTHORIZED_DIR = "family_members"  # or authorized_persons

# Function to check if face belongs to authorized person
def is_authorized_face(frame):
    try:
        result = DeepFace.find(img_path=frame, db_path=AUTHORIZED_DIR, enforce_detection=False)
        if len(result) > 0 and not result[0].empty:
            return True
    except Exception as e:
        print("Face recognition error:", e)
    return False


# Distance estimation (very simplified demo)
def estimate_distance(bbox, frame_width):
    x1, y1, x2, y2 = bbox
    object_width_px = x2 - x1
    focal_length = 500  # placeholder, needs calibration
    real_width_cm = 50  # average cattle/person shoulder width
    distance = (real_width_cm * focal_length) / object_width_px
    return distance


# Send email alert
def send_alert_email(message):
    sender = "your_email@gmail.com"
    password = "your_app_password"  # For Gmail, generate app password
    recipient = "recipient_email@gmail.com"

    msg = MIMEText(message)
    msg["Subject"] = "üö® Security Alert - Unauthorized Activity Detected"
    msg["From"] = sender
    msg["To"] = recipient

    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(sender, password)
            server.sendmail(sender, recipient, msg.as_string())
            print("‚úÖ Alert email sent.")
    except Exception as e:
        print("‚ùå Email failed:", e)


# Threat detection algorithm
def detect_threat(frame):
    results = yolo_model(frame, show=True, conf=0.5)  # 0=person, 1=bicycle (as proxy for cattle)
    for r in results:
        for box in r.boxes.xyxy:  # bounding boxes
            x1, y1, x2, y2 = map(int, box)
            distance = estimate_distance((x1, y1, x2, y2), frame.shape[1])

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)

            # Check if authorized
            crop = frame[y1:y2, x1:x2]
            if not is_authorized_face(crop):
                cv2.putText(frame, f"‚ö† Unauthorized {distance:.1f}cm", (x1, y1-10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)

                if distance < 200:  # threshold: 2 meters
                    # send_alert_email("Unauthorized person detected near cattle!")
                    print("Unauthorized person detected near cattle!")

    return frame


# Main video surveillance loop (for webcam or video file)
def run_surveillance(video_source=0):
    cap = cv2.VideoCapture(video_source)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        processed = detect_threat(frame)

        cv2.imshow("Surveillance Feed", processed)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()


# Run the demo
run_surveillance(0)  # webcam
# run_surveillance("./media_files/animal_surveillance/goru-churi.mp4")  # video file


In [None]:
%pip install deepface

In [None]:
# =========================================
# Computer Vision Surveillance Demo
# =========================================
# Compatible with Jupyter Notebook / Google Colab
# Requirements: ultralytics, streamlit, opencv-python, deepface, smtplib

import cv2
import numpy as np
import smtplib
from email.mime.text import MIMEText
from ultralytics import YOLO
from deepface import DeepFace
import os
import matplotlib.pyplot as plt 
import numpy as np

# Load YOLO model (pre-trained COCO dataset)
yolo_model = YOLO("yolo11m.pt")

# Define authorized persons directory
AUTHORIZED_DIR = "family_members"  # or authorized_persons

# Create the directory if it doesn't exist to avoid errors
os.makedirs(AUTHORIZED_DIR, exist_ok=True)

# Function to check if face belongs to authorized person
def is_authorized_face(frame_crop):
    """
    Checks if a face in the cropped frame is authorized by comparing against a database.
    """
    if frame_crop.size == 0:
        return False

    # Define the path to the weights file
    weights_file = os.path.join(os.path.expanduser("~"), ".deepface", "weights", "vgg_face_weights.h5")

    # Check if the weights file exists, if not, inform the user
    if not os.path.exists(weights_file):
        print("VGG-Face weights file not found. DeepFace might get stuck downloading it.")
        print("Please ensure you have a stable internet connection.")
        print("If the download fails repeatedly, consider downloading it manually and placing it in", os.path.dirname(weights_file))

    try:
        # We can iterate through known faces and verify.
        for person_image in os.listdir(AUTHORIZED_DIR):
            if person_image.lower().endswith(('.png', '.jpg', '.jpeg')):
                known_face_path = os.path.join(AUTHORIZED_DIR, person_image)
                try:
                    # The verify function handles detection and comparison.
                    result = DeepFace.verify(img1_path=frame_crop,
                                           img2_path=known_face_path,
                                           model_name='VGG-Face',
                                           enforce_detection=False)
                    if result.get("verified", False):
                        return True  # Found a match
                except Exception as e:
                    # This can happen if no face is found in the crop, which is expected.
                    print(f"Verification error: {e}")
                    continue
    except Exception as e:
        print(f"Face recognition error: {e}")
    return False


# Distance estimation (very simplified demo)
def estimate_distance(bbox, frame_width):
    x1, y1, x2, y2 = bbox
    object_width_px = x2 - x1
    focal_length = 500  # placeholder, needs calibration
    real_width_cm = 50  # average cattle/person shoulder width
    distance = (real_width_cm * focal_length) / object_width_px
    return distance


# Send email alert
def send_alert_email(message):
    sender = "your_email@gmail.com"
    password = "your_app_password"  # For Gmail, generate app password
    recipient = "recipient_email@gmail.com"

    msg = MIMEText(message)
    msg["Subject"] = "üö® Security Alert - Unauthorized Activity Detected"
    msg["From"] = sender
    msg["To"] = recipient

    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(sender, password)
            server.sendmail(sender, recipient, msg.as_string())
            print("‚úÖ Alert email sent.")
    except Exception as e:
        print("‚ùå Email failed:", e)


# Threat detection algorithm
def detect_threat(frame):
    results = yolo_model(frame)  # 0=person, 1=bicycle (as proxy for cattle)
    for r in results:
        for box in r.boxes.xyxy:  # bounding boxes
            x1, y1, x2, y2 = map(int, box)
            distance = estimate_distance((x1, y1, x2, y2), frame.shape[1])

            # Draw bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)

            # Check if authorized
            crop = frame[y1:y2, x1:x2]
            if not is_authorized_face(crop):
                cv2.putText(frame, f"‚ö† Unauthorized {distance:.1f}cm", (x1, y1-10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)

                if distance < 200:  # threshold: 2 meters
                    # send_alert_email("Unauthorized person detected near cattle!")
                    print("Unauthorized person detected near cattle!")

    return frame


# Main video surveillance loop (for webcam or video file)
def run_surveillance(video_source=0):
    cap = cv2.VideoCapture(video_source)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        processed = detect_threat(frame)

        cv2.imshow("Surveillance Feed", processed)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()


# Run the demo
if __name__ == "__main__":
    run_surveillance('./media_files/WIN_20251103_14_11_20_Pro.mp4')  # webcam
    # run_surveillance("./media_files/animal_surveillance/goru-churi.mp4")  # video file
    # run_surveillance("https://youtu.be/P_Puyf_Rb68")  # video file

In [None]:
import cv2
import numpy as np
import smtplib
from email.mime.text import MIMEText
from ultralytics import YOLO
from deepface import DeepFace
import os
import matplotlib.pyplot as plt 
 

DeepFace.verify(img1_path="./images/obama/image1.jpg", img2_path="./family_members/robin/robin_02.jpg")

In [None]:
# List of available backends, models, and distance metrics
backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface"]
models = ["VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "DeepID", "ArcFace", "Dlib", "SFace"]
metrics = ["cosine", "euclidean", "euclidean_l2"]

In [None]:
import cv2
import numpy as np
import smtplib
from email.mime.text import MIMEText
from ultralytics import YOLO
from deepface import DeepFace
import os
import matplotlib.pyplot as plt 
import numpy as np

# List of available backends, models, and distance metrics
backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface", "yolo11m"]
models = ["VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "DeepID", "ArcFace", "Dlib", "SFace"]
metrics = ["cosine", "euclidean", "euclidean_l2"]

# DeepFace.stream(db_path = "family_members/")
# Path to the image for face recognition
# img = "./family_members/robin3.jpg"

# def face_recognition(img):
#     # Perform face recognition on the provided image
#     # Find faces and identify people using a specific model and distance metric
#     people = DeepFace.find(img_path=img, db_path="Data/", model_name=models[2], distance_metric=metrics[1])

#     # Display the original image
#     plt.imshow(cv2.imread(img))

#     # Print the identities of the recognized people
#     for person in people:
#         print(person['identity'][0].split('/')[1])

# Perform face recognition on a single image
# face_recognition(img)

def realtime_face_recognition():
    # Define a video capture object
    # cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture("./media_files/WIN_20251103_14_11_20_Pro.mp4")

    while True:
        # Capture the video frame by frame
        ret, frame = cap.read()
        if not ret:
            break

        # Perform face recognition on the captured frame
        # Find faces and identify people using a specific model and distance metric
        # people = DeepFace.find(img_path=frame, db_path="family_members/", model_name=models[1], distance_metric=metrics[1], enforce_detection=False)
        people = DeepFace.find(img_path=frame, db_path="family_members/", model_name=models[1], distance_metric=metrics[1], enforce_detection=False)

        for person in people:
            # Retrieve the coordinates of the face bounding box
            x = person['source_x'][0]
            y = person['source_y'][0]
            w = person['source_w'][0]
            h = person['source_h'][0]

            # Draw a rectangle around the face
            cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

            # Get the person's name and display it on the image
            name = person['identity'][0].split('/')[1]
            cv2.putText(frame, name, (x, y), cv2.FONT_ITALIC, 1, (0, 0, 255), 2)

        # Display the resulting frame
        cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
        cv2.resizeWindow('frame', 980, 450)
        cv2.imshow('frame', frame)

        # Check if the 'q' button is pressed to quit the program
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the video capture object and close all windows
    cap.release()
    cv2.destroyAllWindows()

# Perform real-time face recognition using the webcam
realtime_face_recognition()

In [None]:
import cv2
from ultralytics import YOLO
from deepface import DeepFace
import numpy as np

# List of available backends, models, and distance metrics
backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface", "yolov11m"]
models = ["VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "DeepID", "ArcFace", "Dlib", "SFace"]
metrics = ["cosine", "euclidean", "euclidean_l2"]

# Load YOLO Model (pretrained)
yolo_model = YOLO("yolo11m.pt")  # YOLO Nano model (COCO pretrained)

def process_frame(frame):
    # Deteksi objek dengan YOLO
    results = yolo_model(frame)

    # Hasil anotasi pada frame
    annotated_frame = results[0].plot()
    objects = []

    for result in results[0].boxes:
        box = result.xyxy.tolist()[0]  # Ambil box pertama
        label = result.cls[0]  # Ambil label pertama

        # Jika objek adalah manusia, prediksi ekspresi dan umur
        if int(label) == 0:  # Label 0 = "person" di YOLO COCO
            x1, y1, x2, y2 = map(int, box)
            cropped_face = frame[y1:y2, x1:x2]
            
            try:
                # Prediksi ekspresi dan umur menggunakan DeepFace
                # analysis = DeepFace.find(cropped_face, db_path="family_members/", model_name=models[1], distance_metric=metrics[1], enforce_detection=False)
                
                # # Tambahkan teks ke frame
                # cv2.putText(annotated_frame, 
                #             # f"Age: {analysis[0].get('age')} Emotion: {analysis[0].get('dominant_emotion')}", 
                #             (x1, y1-10), 
                #             cv2.FONT_HERSHEY_SIMPLEX, 
                #             0.9, 
                #             (0, 255, 0), 
                #             2)
                
                # objects.append({
                #     "label": "person",
                #     # "age": analysis[0].get('age'),
                #     # "emotion": analysis[0].get('dominant_emotion'),
                #     "coordinates": box
                # })
                 # Perform face recognition on the captured frame
                # Find faces and identify people using a specific model and distance metric
                # people = DeepFace.find(img_path=frame, db_path="family_members/", model_name=models[1], distance_metric=metrics[1], enforce_detection=False)
                people = DeepFace.find(img_path=frame, db_path="family_members/", model_name=models[1], distance_metric=metrics[1], enforce_detection=False)

                for person in people:
                    # Retrieve the coordinates of the face bounding box
                    x = person['source_x'][0]
                    y = person['source_y'][0]
                    w = person['source_w'][0]
                    h = person['source_h'][0]

                    # Draw a rectangle around the face
                    cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)

                    # Get the person's name and display it on the image
                    name = person['identity'][0].split('/')[1]
                    cv2.putText(frame, name, (x, y), cv2.FONT_ITALIC, 1, (0, 0, 255), 2)

                # Display the resulting frame
                cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
                # cv2.resizeWindow('frame', 450, 300)
                cv2.imshow('frame', frame)
                
            except Exception as e:
                print(f"Error analyzing face: {e}")

    return annotated_frame, objects

def main():
    # Inisialisasi kamera
    cap = cv2.VideoCapture("/media_files/WIN_20251103_14_11_20_Pro.mp4")

    while cap.isOpened():
        # Baca frame dari kamera
        ret, frame = cap.read()
        
        if not ret:
            print("Gagal membaca frame dari kamera")
            break

        # Proses frame
        annotated_frame, objects = process_frame(frame)

        # Tampilkan frame yang sudah diproses
        cv2.imshow("Real-Time Detection", annotated_frame)

        # Cetak informasi objek yang terdeteksi
        if objects:
            print("Objek Terdeteksi:")
            for obj in objects:
                print(f"- Person: Umur {obj['age']}, Emosi {obj['emotion']}")

        # Keluar jika tombol 'q' ditekan
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Tutup kamera dan jendela
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

In [None]:
"""
YOLO11 + DeepFace Surveillance Example
Single-file demo that uses Ultralytics YOLO11 for detection and DeepFace for face recognition.

Requirements:
  pip install ultralytics deepface opencv-python-headless imutils playsound numpy

Notes:
 - Adjust MODEL_PATH and DATABASE_DIR to your environment.
 - This script assumes you have a folder `database/` with subfolders per person containing reference images.
 - YOLO11 model used here is `yolo11n.pt` by default; change to a different checkpoint if needed.
 - Ensure you comply with local laws and obtain consent before deploying facial recognition.

Author: Generated by ChatGPT for user
"""

import os
import time
import argparse
from pathlib import Path

import cv2
import numpy as np

try:
    from ultralytics import YOLO
except Exception as e:
    raise ImportError("ultralytics is required. Install with `pip install ultralytics`\noriginal error: %s" % e)

try:
    from deepface import DeepFace
except Exception as e:
    raise ImportError("deepface is required. Install with `pip install deepface`\noriginal error: %s" % e)

# ------------------------- CONFIG -------------------------
MODEL_PATH = "yolo11n.pt"  # change to your YOLO11 checkpoint or path (e.g., "yolo11s.pt")
DATABASE_DIR = "database"   # folder structure: database/person_name/img1.jpg, img2.jpg, ...
VIDEO_SOURCE = 0            # 0 for webcam, or 'rtsp://...' or path to video file
RECOG_BACKEND = "facenet"  # deepface backend: 'facenet','arcface','vggface','deepface', etc.
REC_THRESHOLD = 0.4         # similarity threshold (lower=more strict for some backends)
SKIP_FRAMES = 2             # process every SKIP_FRAMES-th frame for speed
SHOW_WINDOW = True
SAVE_LOG = True
LOG_FILE = "surveillance_log.csv"

# ----------------------------------------------------------

# helper: prepare database embeddings using DeepFace
def build_db_embeddings(db_dir: str, model_name: str = RECOG_BACKEND):
    db = {}
    if not os.path.isdir(db_dir):
        print(f"[WARN] Database directory '{db_dir}' not found. No reference faces loaded.")
        return db

    print("[INFO] Building face embeddings from database...")
    for person in os.listdir(db_dir):
        person_dir = os.path.join(db_dir, person)
        if not os.path.isdir(person_dir):
            continue
        embeddings = []
        for img_name in os.listdir(person_dir):
            img_path = os.path.join(person_dir, img_name)
            try:
                # DeepFace.represent returns an embedding vector
                rep = DeepFace.represent(img_path, model_name=model_name, enforce_detection=True)
                if isinstance(rep, list):
                    rep = rep[0]["embedding"] if "embedding" in rep[0] else rep[0]
                embeddings.append(np.array(rep))
            except Exception as e:
                print(f"[WARN] Could not process {img_path}: {e}")
        if embeddings:
            db[person] = np.vstack(embeddings)
            print(f"[INFO] Loaded {db[person].shape[0]} images for '{person}'")
    return db

# helper: match a face embedding to database
def match_embedding(embedding: np.ndarray, db: dict, threshold: float = REC_THRESHOLD):
    best_name = None
    best_score = float("inf")
    for person, embs in db.items():
        # compute cosine distance to each stored embedding and take min
        # convert to 1 - cosine_similarity
        # normalize embedding lengths
        emb_norm = embedding / (np.linalg.norm(embedding) + 1e-10)
        embs_norm = embs / (np.linalg.norm(embs, axis=1, keepdims=True) + 1e-10)
        cos_sim = np.dot(embs_norm, emb_norm)
        # distance = 1 - similarity
        dists = 1.0 - cos_sim
        idx = np.argmin(dists)
        if dists[idx] < best_score:
            best_score = float(dists[idx])
            best_name = person
    if best_score <= threshold:
        return best_name, best_score
    return None, best_score

# main surveillance loop
def run_surveillance(
    model_path=MODEL_PATH,
    db_dir=DATABASE_DIR,
    video_source=VIDEO_SOURCE,
    backend=RECOG_BACKEND,
    threshold=REC_THRESHOLD,
    skip_frames=SKIP_FRAMES,
    show_window=SHOW_WINDOW,
    save_log=SAVE_LOG,
    log_file=LOG_FILE,
):
    # load YOLO model
    print(f"[INFO] Loading YOLO model: {model_path}")
    model = YOLO(model_path)

    # build database embeddings
    db = build_db_embeddings(db_dir, model_name=backend)

    # open video source
    cap = cv2.VideoCapture(video_source)
    if not cap.isOpened():
        raise RuntimeError(f"Could not open video source {video_source}")

    frame_idx = 0
    start_time = time.time()

    if save_log:
        with open(log_file, "w") as f:
            f.write("timestamp,frame,object,x1,y1,x2,y2,name,score\n")

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("[INFO] End of stream or cannot fetch frame")
                break

            frame_idx += 1
            if frame_idx % skip_frames != 0:
                continue

            # run detection
            results = model.predict(frame, imgsz=640, conf=0.4, iou=0.5)

            # results can contain multiple images (batch); take first
            r = results[0]

            # iterate detections
            for det in r.boxes:
                cls = int(det.cls[0]) if hasattr(det, 'cls') else int(det.cls)
                conf = float(det.conf[0]) if hasattr(det, 'conf') else float(det.conf)
                x1, y1, x2, y2 = map(int, det.xyxy[0]) if hasattr(det, 'xyxy') else map(int, det.xyxy)

                # NOTE: class mapping depends on the model's dataset
                # Commonly COCO uses 0=person; YOLO11 may include 'face' class in some configurations.
                # We'll treat class 0 as 'person' here; adjust if your model labels differ.
                label = 'person' if cls == 0 else f'class_{cls}'

                # crop ROI for face recognition: try to detect face inside person bbox
                roi = frame[y1:y2, x1:x2]
                if roi.size == 0:
                    continue

                # use DeepFace to detect & represent face(s) in ROI
                try:
                    # DeepFace.extract_faces returns a list of dicts with 'facial_area' and 'face'
                    faces = DeepFace.extract_faces(img_path=roi, detector_backend='opencv', enforce_detection=False)
                except Exception as e:
                    # as a fallback, try enforce_detection True
                    faces = []

                if faces:
                    for face_info in faces:
                        facial_img = face_info.get('face')
                        if facial_img is None:
                            continue
                        # get embedding
                        try:
                            rep = DeepFace.represent(img_path = facial_img, model_name = backend, enforce_detection = False)
                            if isinstance(rep, list):
                                rep = rep[0]["embedding"] if "embedding" in rep[0] else rep[0]
                            emb = np.array(rep)
                        except Exception as e:
                            print(f"[WARN] embedding error: {e}")
                            continue

                        name, score = match_embedding(emb, db, threshold)
                        # draw box & label
                        if name:
                            text = f"{name} ({score:.3f})"
                        else:
                            text = f"Unknown ({score:.3f})"

                        # compute absolute coords of facial area relative to frame
                        area = face_info.get('facial_area')
                        if area:
                            fx, fy, fw, fh = area['x'], area['y'], area['w'], area['h']
                            # area is relative to the roi; convert to frame coords
                            ax1 = x1 + int(fx)
                            ay1 = y1 + int(fy)
                            ax2 = ax1 + int(fw)
                            ay2 = ay1 + int(fh)
                            cv2.rectangle(frame, (ax1, ay1), (ax2, ay2), (0,255,0), 2)
                            cv2.putText(frame, text, (ax1, ay1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

                        # log
                        if save_log:
                            ts = time.time()
                            with open(log_file, 'a') as f:
                                f.write(f"{ts},{frame_idx},{label},{x1},{y1},{x2},{y2},{name},{score}\n")

                else:
                    # no faces found inside this detection; optionally run full-frame face detector
                    pass

            # show
            if show_window:
                cv2.imshow('surveillance', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

    except KeyboardInterrupt:
        print('[INFO] Interrupted by user')
    finally:
        cap.release()
        cv2.destroyAllWindows()
        print(f"[INFO] Done. Processed {frame_idx} frames in {time.time()-start_time:.2f}s")


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', default=MODEL_PATH)
    parser.add_argument('--db', default=DATABASE_DIR)
    parser.add_argument('--source', default=VIDEO_SOURCE)
    parser.add_argument('--backend', default=RECOG_BACKEND)
    parser.add_argument('--threshold', type=float, default=REC_THRESHOLD)
    parser.add_argument('--skip', type=int, default=SKIP_FRAMES)
    args = parser.parse_args()

    run_surveillance(model_path=args.model, db_dir=args.db, video_source=args.source, backend=args.backend, threshold=args.threshold, skip_frames=args.skip)


In [None]:
# Import necessary libraries

from ultralytics import YOLO
from ultralytics.engine.results import Results  
from deepface import DeepFace
from PIL import Image
import shutil
import os

totalKnownFaces=0
totalUnknownFaces=0
knownNames =[]

def faceRecognition(input_image):

    global totalKnownFaces
    global totalUnknownFaces


    # Path to the directory containing cropped objects
    cropped_objects_dir = "./faces/"
    
    # Path to the directory to save unknown faces
    unknown_faces_dir = "./unknown/"
    
    # Path to the directory to save known faces
    known_faces_dir = "./known/"
    
    # Initialize a list to store the extracted names
    extracted_names = []
    
    # Check if the 'unknown' folder exists, otherwise create it
    if not os.path.exists(unknown_faces_dir):
        os.makedirs(unknown_faces_dir)
    else:
        # If the 'unknown' folder exists, clear all files and subfolders
        for file_or_folder in os.listdir(unknown_faces_dir):
            file_or_folder_path = os.path.join(unknown_faces_dir, file_or_folder)
            if os.path.isfile(file_or_folder_path):
                os.remove(file_or_folder_path)
            elif os.path.isdir(file_or_folder_path):
                shutil.rmtree(file_or_folder_path)

    # Check if the 'known' folder exists, otherwise create it
    if not os.path.exists(known_faces_dir):
        os.makedirs(known_faces_dir)
    else:
        # If the 'known' folder exists, clear all files and subfolders
        for file_or_folder in os.listdir(known_faces_dir):
            file_or_folder_path = os.path.join(known_faces_dir, file_or_folder)
            if os.path.isfile(file_or_folder_path):
                os.remove(file_or_folder_path)
            elif os.path.isdir(file_or_folder_path):
                shutil.rmtree(file_or_folder_path)
    
    # Iterate through the image files in the directory
    for filename in os.listdir(cropped_objects_dir):
        if filename.lower().endswith((".jpg", ".jpeg", ".png")):
            img_path = os.path.join(cropped_objects_dir, filename)
            model = DeepFace.find(img_path=img_path, db_path="family_members", enforce_detection=False, model_name="Facenet512")

            # print("Model length:", len(model))
            # print("Model:", model)


            # Check if a face was recognized in the image
            if model and len(model[0]['identity']) > 0:
                # Extract the name and append it to the list
                name = model[0]['identity'][0].split('/')[1]

                # name = model[0]['identity'][0].split('/')[0]
   

                print("Name:", name)

                # Save the known face into the 'known' folder
                known_faces_path = os.path.join(known_faces_dir, f"{name}.jpg")
                totalKnownFaces+=1
                knownNames.append(name)
                shutil.copy(img_path, known_faces_path)
                
            else:
                # If no face is recognized, set name to 'unknown'
                name = 'unknown'
                print("No face detected in:", img_path)
                # Save the unknown face into the 'unknown' folder
                unknown_faces_path = os.path.join(unknown_faces_dir, f"{totalUnknownFaces}.jpg")
                totalUnknownFaces+=1
                shutil.copy(img_path, unknown_faces_path)
                
            extracted_names.append(name)
            
    return extracted_names

def getKnownName():
    return knownNames

def setKnownName():
    global knownNames
    knownNames=[]

def setFacesToZero():
    global totalKnownFaces
    global totalUnknownFaces
    totalKnownFaces=0
    totalUnknownFaces=0

def getKnownFaces():
    return totalKnownFaces

def getUnknownFaces():
    return totalUnknownFaces

def faceExtraction(input_image, model, results):
    # Load the image
    image = Image.open(input_image)
    detected_objects = []

    if hasattr(results, 'boxes') and hasattr(results, 'names'):
        for box in results.boxes.xyxy:
            object_id = int(box[-1])
            object_name = results.names.get(object_id)
            x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])

            detected_objects.append((object_name, (x1, y1, x2, y2)))

    # Create or clear the 'faces' directory
    if os.path.exists("./faces"):
        shutil.rmtree("./faces")
    os.makedirs("./faces")

    totalFaces=0
    # Crop and save each detected object
    for i, (object_name, (x1, y1, x2, y2)) in enumerate(detected_objects):
        object_image = image.crop((x1, y1, x2, y2))
        object_image.save(f"./faces/face{i}.jpg")
        totalFaces+=1
        
    return totalFaces



def faceDetection(uploaded_file):
    img = Image.open(uploaded_file)
    temp_image_path = "./temp_image.jpg"  # Temporary path to store the converted image
    img.save(temp_image_path, format="JPEG")

    # Use the Ultralytics model
    model = YOLO('best.pt')
    results: Results = model.predict(temp_image_path)[0]

    total_faces = faceExtraction(temp_image_path, model, results)
    
    # Remove the temporary image file
    os.remove(temp_image_path)

    return total_faces

In [None]:
import cv2
from ultralytics import YOLO
from deepface import DeepFace
import numpy as np
import os
import matplotlib.pyplot as plt 
import numpy as np

# List of available backends, models, and distance metrics
backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface", "yolov11m", "yolov11m"]
models = ["VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "DeepID", "ArcFace", "Dlib", "SFace"]
metrics = ["cosine", "euclidean", "euclidean_l2"]

# Load YOLO Model (pretrained)
yolo_model = YOLO("yolo11m.pt")  # YOLO Nano model (COCO pretrained)

def process_frame(frame):
    # Deteksi objek dengan YOLO
    results = yolo_model(frame)

    # Hasil anotasi pada frame
    annotated_frame = results[0].plot()
    objects = []

    for result in results[0].boxes:
        box = result.xyxy.tolist()[0]  # Ambil box pertama
        label = result.cls[0]  # Ambil label pertama

        # Jika objek adalah manusia, prediksi ekspresi dan umur
        if int(label) == 0:  # Label 0 = "person" di YOLO COCO
            x1, y1, x2, y2 = map(int, box)
            cropped_face = frame[y1:y2, x1:x2]
            
            try:
                # Prediksi ekspresi dan umur menggunakan DeepFace
                analysis = DeepFace.find(cropped_face, db_path="family_members/", actions=['identify', 'emotion', 'age'], enforce_detection=False)
                
                # Tambahkan teks ke frame
                cv2.putText(annotated_frame, 
                            f"Age: {analysis[0].get('age')} Emotion: {analysis[0].get('dominant_emotion')}", 
                            (x1, y1-10), 
                            cv2.FONT_HERSHEY_SIMPLEX, 
                            0.9, 
                            (0, 255, 0), 
                            2)
                
                objects.append({
                    "label": "person",
                    "age": analysis[0].get('age'),
                    "emotion": analysis[0].get('dominant_emotion'),
                    "coordinates": box
                })
            except Exception as e:
                print(f"Error analyzing face: {e}")

    return annotated_frame, objects

def main():
    # Inisialisasi kamera
    cap = cv2.VideoCapture("./media_files/WIN_20251103_14_11_20_Pro.mp4")

    while cap.isOpened():
        # Baca frame dari kamera
        ret, frame = cap.read()
        
        if not ret:
            print("Gagal membaca frame dari kamera")
            break

        # Proses frame
        annotated_frame, objects = process_frame(frame)

        # Tampilkan frame yang sudah diproses
        cv2.imshow("Real-Time Detection", annotated_frame)

        # Cetak informasi objek yang terdeteksi
        if objects:
            print("Objek Terdeteksi:")
            for obj in objects:
                print(f"- Person: Umur {obj['age']}, Emosi {obj['emotion']}")

        # Keluar jika tombol 'q' ditekan
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Tutup kamera dan jendela
    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

In [None]:
import cv2
from ultralytics import YOLO
# import numpy as np
import smtplib
from email.mime.text import MIMEText
from ultralytics import YOLO
from deepface import DeepFace
import os
import matplotlib.pyplot as plt 
import numpy as np

# List of available backends, models, and distance metrics
backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface"]
models = ["VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "DeepID", "ArcFace", "Dlib", "SFace"]
metrics = ["cosine", "euclidean", "euclidean_l2"]
# Load your YOLO11 model. For face detection, you can:
# - Use a pre-trained YOLO11 model and filter for 'person' class, then use a face detector.
# - Ideally, train a custom YOLO11 model on a face dataset for best results.
model = YOLO("yolo11m.pt")  # Or your custom face-detection model

# Start video capture (from a file or webcam)
cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")  # 0 for default webcam, or replace with file path

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Stage 1: Run Face Detection with YOLO11
    results = model(frame, conf=0.5)  # Adjust confidence as needed
    
    for r in results:
        boxes = r.boxes
        for box in boxes:
            # Get bounding box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls = int(box.cls[0])
            
            # Optional: Filter for a specific class (e.g., 'person') if using a general detector
            # if model.names[cls] == 'person':
            
            # Crop the detected face from the frame
            cropped_face = frame[y1:y2, x1:x2]
            
            # Check if the cropped area is valid
            if cropped_face.size == 0:
                continue
            
            # Stage 2: Run Face Recognition with DeepFace
            try:
                # Use DeepFace to analyze the cropped face.
                # The 'actions' parameter can include ['verify', 'emotion', 'age', 'gender'] etc.
                # analysis = DeepFace.verify(cropped_face, actions=['emotion'], enforce_detection=False)
                analysis = DeepFace.find(img_path=frame, db_path="family_members/", model_name=models[1], distance_metric=metrics[1], enforce_detection=False)
                # If using verification against a database:
                # result = DeepFace.verify(cropped_face, "path_to_reference_image.jpg")
                
                # Extract the dominant emotion (or other analysis results)
                dominant_emotion = analysis[0]['dominant_emotion']
                identity = "Unknown"  # Placeholder. Replace with DeepFace.verify() logic.
                
                # Draw the bounding box and label
                label = f"{identity} - {dominant_emotion}"
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                
            except Exception as e:
                # Handle cases where DeepFace analysis fails
                print(f"DeepFace analysis error: {e}")

    # Display the resulting frame
    cv2.imshow('Facial Recognition Surveillance', frame)
    
    # Break the loop on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
import os
import time
import argparse
from pathlib import Path

import cv2
import numpy as np
# import cv2
# import numpy as np
import smtplib
from email.mime.text import MIMEText
from ultralytics import YOLO
from deepface import DeepFace
import os
import matplotlib.pyplot as plt 
import numpy as np

try:
    # no-op references to pacify linters about "is not accessed"
    from pathlib import Path as _Path  # noqa: F401
    import smtplib as _smtplib  # noqa: F401
    from email.mime.text import MIMEText as _MIMEText  # noqa: F401
    import matplotlib.pyplot as _plt  # noqa: F401
    import numpy as _np  # noqa: F401
    _ = (_Path, _smtplib, _MIMEText, _plt, _np)
except Exception:
    pass

# List of available backends, models, and distance metrics
backends = ["opencv", "ssd", "dlib", "mtcnn", "retinaface"]
models = ["VGG-Face", "Facenet", "Facenet512", "OpenFace", "DeepFace", "DeepID", "ArcFace", "Dlib", "SFace"]
metrics = ["cosine", "euclidean", "euclidean_l2"]

try:
    from ultralytics import YOLO
except Exception as e:
    raise ImportError("ultralytics is required. Install with `pip install ultralytics`\noriginal error: %s" % e)

try:
    from deepface import DeepFace
except Exception as e:
    raise ImportError("deepface is required. Install with `pip install deepface`\noriginal error: %s" % e)

# ------------------------- CONFIG -------------------------
MODEL_PATH = "yolo11m.pt"  # change to your YOLO11 checkpoint or path (e.g., "yolo11s.pt")
DATABASE_DIR = "family_members/"   # folder structure: database/person_name/img1.jpg, img2.jpg, ...
VIDEO_SOURCE = "media_files/WIN_20251103_14_11_20_Pro.mp4"            # 0 for webcam, or 'rtsp://...' or path to video file
RECOG_BACKEND = "Facenet"  # deepface backend: 'facenet','arcface','vggface','deepface', etc.
REC_THRESHOLD = 0.25         # similarity threshold (lower=more strict for some backends)
SKIP_FRAMES = 2             # process every SKIP_FRAMES-th frame for speed
SHOW_WINDOW = True
SAVE_LOG = True
LOG_FILE = "surveillance_log.csv"

# ----------------------------------------------------------

# helper: prepare database embeddings using DeepFace
def build_db_embeddings(db_dir: str, model_name: str = RECOG_BACKEND):
    db = {}
    if not os.path.isdir(db_dir):
        print(f"[WARN] Database directory '{db_dir}' not found. No reference faces loaded.")
        return db

    print("[INFO] Building face embeddings from database...")
    for person in os.listdir(db_dir):
        person_dir = os.path.join(db_dir, person)
        if not os.path.isdir(person_dir):
            continue
        embeddings = []
        for img_name in os.listdir(person_dir):
            img_path = os.path.join(person_dir, img_name)
            try:
                # DeepFace.represent returns an embedding vector
                rep = DeepFace.represent(img_path, model_name=model_name, enforce_detection=True)
                if isinstance(rep, list):
                    rep = rep[0]["embedding"] if "embedding" in rep[0] else rep[0]
                embeddings.append(np.array(rep))
            except Exception as e:
                print(f"[WARN] Could not process {img_path}: {e}")
        if embeddings:
            db[person] = np.vstack(embeddings)
            print(f"[INFO] Loaded {db[person].shape[0]} images for '{person}'")
    return db

# helper: match a face embedding to database
def match_embedding(embedding: np.ndarray, db: dict, threshold: float = REC_THRESHOLD):
    best_name = None
    best_score = float("inf")
    for person, embs in db.items():
        # compute cosine distance to each stored embedding and take min
        # convert to 1 - cosine_similarity
        # normalize embedding lengths
        emb_norm = embedding / (np.linalg.norm(embedding) + 1e-10)
        embs_norm = embs / (np.linalg.norm(embs, axis=1, keepdims=True) + 1e-10)
        cos_sim = np.dot(embs_norm, emb_norm)
        # distance = 1 - similarity
        dists = 1.0 - cos_sim
        idx = np.argmin(dists)
        if dists[idx] < best_score:
            best_score = float(dists[idx])
            best_name = person
    if best_score <= threshold:
        return best_name, best_score
    return None, best_score

# main surveillance loop
def run_surveillance(
    model_path=MODEL_PATH,
    db_dir=DATABASE_DIR,
    video_source=VIDEO_SOURCE,
    backend=RECOG_BACKEND,
    threshold=REC_THRESHOLD,
    skip_frames=SKIP_FRAMES,
    show_window=SHOW_WINDOW,
    save_log=SAVE_LOG,
    log_file=LOG_FILE,
):
    # load YOLO model
    print(f"[INFO] Loading YOLO model: {model_path}")
    model = YOLO(model_path)

    # build database embeddings
    db = build_db_embeddings(db_dir, model_name=backend)

    # open video source
    cap = cv2.VideoCapture(video_source)
    if not cap.isOpened():
        raise RuntimeError(f"Could not open video source {video_source}")

    frame_idx = 0
    start_time = time.time()

    if save_log:
        with open(log_file, "w") as f:
            f.write("timestamp,frame,object,x1,y1,x2,y2,name,score\n")

    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("[INFO] End of stream or cannot fetch frame")
                break

            frame_idx += 1
            if frame_idx % skip_frames != 0:
                continue

            # run detection
            results = model.predict(frame, imgsz=640, conf=0.25, iou=0.5)

            # results can contain multiple images (batch); take first
            r = results[0]

            # iterate detections
            for det in r.boxes:
                cls = int(det.cls[0]) if hasattr(det, 'cls') else int(det.cls)
                conf = float(det.conf[0]) if hasattr(det, 'conf') else float(det.conf)
                x1, y1, x2, y2 = map(int, det.xyxy[0]) if hasattr(det, 'xyxy') else map(int, det.xyxy)

                # NOTE: class mapping depends on the model's dataset
                # Commonly COCO uses 0=person; YOLO11 may include 'face' class in some configurations.
                # We'll treat class 0 as 'person' here; adjust if your model labels differ.
                label = 'person' if cls == 0 else f'class_{cls}'

                # crop ROI for face recognition: try to detect face inside person bbox
                roi = frame[y1:y2, x1:x2]
                if roi.size == 0:
                    continue

                # use DeepFace to detect & represent face(s) in ROI
                try:
                    # DeepFace.extract_faces returns a list of dicts with 'facial_area' and 'face'
                    faces = DeepFace.extract_faces(img_path=roi, detector_backend='opencv', enforce_detection=False)
                except Exception as e:
                    # as a fallback, try enforce_detection True
                    faces = []

                if faces:
                    for face_info in faces:
                        facial_img = face_info.get('face')
                        if facial_img is None:
                            continue
                        # get embedding
                        try:
                            rep = DeepFace.represent(img_path=facial_img, model_name=backend, enforce_detection=False)
                            if isinstance(rep, list):
                                rep = rep[0]["embedding"] if "embedding" in rep[0] else rep[0]
                            emb = np.array(rep)
                        except Exception as e:
                            print(f"[WARN] embedding error: {e}")
                            continue

                        name, score = match_embedding(emb, db, threshold)
                        # draw box & label
                        if name:
                            text = f"{name} ({score:.3f})"
                        else:
                            text = f"Unknown ({score:.3f})"

                        # compute absolute coords of facial area relative to frame
                        area = face_info.get('facial_area')
                        if area:
                            fx, fy, fw, fh = area['x'], area['y'], area['w'], area['h']
                            # area is relative to the roi; convert to frame coords
                            ax1 = x1 + int(fx)
                            ay1 = y1 + int(fy)
                            ax2 = ax1 + int(fw)
                            ay2 = ay1 + int(fh)
                            cv2.rectangle(frame, (ax1, ay1), (ax2, ay2), (0,255,0), 2)
                            cv2.putText(frame, text, (ax1, ay1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

                        # log
                        if save_log:
                            ts = time.time()
                            with open(log_file, 'a') as f:
                                f.write(f"{ts},{frame_idx},{label},{x1},{y1},{x2},{y2},{name},{score}\n")

                else:
                    # no faces found inside this detection; optionally run full-frame face detector
                    pass

            # show
            if show_window:
                cv2.imshow('surveillance', frame)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

    except KeyboardInterrupt:
        print('[INFO] Interrupted by user')
    finally:
        cap.release()
        cv2.destroyAllWindows()
        print(f"[INFO] Done. Processed {frame_idx} frames in {time.time()-start_time:.2f}s")


if __name__ == '__main__':
    import sys
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', default=MODEL_PATH)
    parser.add_argument('--db', default=DATABASE_DIR)
    parser.add_argument('--source', default=VIDEO_SOURCE, help="0 (int) for webcam, or path/rtsp string")
    parser.add_argument('--backend', default=RECOG_BACKEND)
    parser.add_argument('--threshold', type=float, default=REC_THRESHOLD)
    parser.add_argument('--skip', type=int, default=SKIP_FRAMES)

    # Use parse_known_args so Jupyter/ipykernel extra args (like --f=...) are ignored
    args, unknown = parser.parse_known_args()
    if unknown:
        # optional: log or print unknown args when running standalone
        try:
            print(f"[WARN] Ignored unknown args: {unknown}")
        except Exception:
            pass

    # sanitize source: if it's a string that can be an int (e.g., '0'), convert to int for webcam
    video_source = args.source
    try:
        # when VIDEO_SOURCE default is int, argparse may produce string; try to coerce
        if isinstance(video_source, str):
            # allow numeric strings (including negative/0) to be used as ints
            if video_source.isdigit() or (video_source.startswith('-') and video_source[1:].isdigit()):
                video_source = int(video_source)
            else:
                # allow "0" with whitespace etc.
                try:
                    video_source = int(float(video_source))
                except Exception:
                    pass
    except Exception:
        pass

    run_surveillance(
        model_path=args.model,
        db_dir=args.db,
        video_source=video_source,
        backend=args.backend,
        threshold=args.threshold,
        skip_frames=args.skip,
    )

    run_surveillance(model_path=args.model, db_dir=args.db, video_source=args.source, backend=args.backend, threshold=args.threshold, skip_frames=args.skip)


In [None]:
"""
Smart Security System with YOLO11, Face Recognition, and Pose Detection
Author: Your Name
Version: 1.0
"""

import cv2
import numpy as np
import face_recognition
import mediapipe as mp
from ultralytics import YOLO
import smtplib
import pygame
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import os
import logging
from datetime import datetime
from typing import List, Tuple, Optional, Dict
from dataclasses import dataclass
from pathlib import Path
import json

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('security_system.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

@dataclass
class SecurityConfig:
    """Configuration class for security system settings."""
    known_faces_dir: str = "family_members/"
    alarm_sound_path: str = "pols-aagyi-pols.mp3"
    alarm_cooldown: int = 3
    confidence_threshold: float = 0.5
    face_recognition_tolerance: float = 0.6
    pose_visibility_threshold: float = 0.5
    # camera_index: int = 0
    camera_index: str = "media_files/animal_surveillance/goru-churi.mp4"
    email_enabled: bool = False
    smtp_server: str = "smtp.gmail.com"
    smtp_port: int = 587
    sender_email: str = ""
    sender_password: str = ""
    recipient_email: str = ""

class SecuritySystem:
    """Main security system class."""
    
    def __init__(self, config: SecurityConfig):
        self.config = config
        self.known_face_encodings = []
        self.known_face_names = []
        self.last_alarm_time = 0
        self.detection_history = {}
        self.frame_count = 0
        self.sound_loaded = False
        
        # Initialize components
        self._initialize_models()
        self._load_known_faces()
        self._setup_audio()
        self._setup_camera()
        
    def _initialize_models(self):
        """Initialize YOLO and MediaPipe models."""
        try:
            self.model = YOLO("yolo11n.pt")
            logger.info("YOLO model loaded successfully")
            
            self.mp_pose = mp.solutions.pose
            self.pose = self.mp_pose.Pose(
                static_image_mode=False,
                model_complexity=1,
                enable_segmentation=False,
                min_detection_confidence=self.config.confidence_threshold
            )
            self.mp_drawing = mp.solutions.drawing_utils
            logger.info("MediaPipe pose model loaded successfully")
            
        except Exception as e:
            logger.error(f"Failed to initialize models: {e}")
            raise
    
    def _load_known_faces(self):
        """Load known faces from directory."""
        known_faces_path = Path(self.config.known_faces_dir)
        
        if not known_faces_path.exists():
            logger.warning(f"Known faces directory '{known_faces_path}' does not exist")
            return
        
        supported_formats = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')
        
        for image_path in known_faces_path.glob('*'):
            if image_path.suffix.lower() in supported_formats:
                try:
                    image = face_recognition.load_image_file(str(image_path))
                    face_encodings = face_recognition.face_encodings(image)
                    
                    if face_encodings:
                        self.known_face_encodings.append(face_encodings[0])
                        name = image_path.stem
                        self.known_face_names.append(name)
                        logger.info(f"Loaded known face: {name}")
                    else:
                        logger.warning(f"No face found in {image_path}")
                        
                except Exception as e:
                    logger.error(f"Error loading face from {image_path}: {e}")
        
        logger.info(f"Loaded {len(self.known_face_encodings)} known faces")
    
    def _setup_audio(self):
        """Setup audio system for alarms."""
        try:
            pygame.mixer.init()
            if Path(self.config.alarm_sound_path).exists():
                pygame.mixer.music.load(self.config.alarm_sound_path)
                self.sound_loaded = True
                logger.info("Alarm sound loaded successfully")
            else:
                logger.warning(f"Alarm sound file not found: {self.config.alarm_sound_path}")
        except pygame.error as e:
            logger.warning(f"Could not load sound file: {e}")
    
    def _setup_camera(self):
        """Setup camera capture."""
        self.cap = cv2.VideoCapture(self.config.camera_index)
        if not self.cap.isOpened():
            logger.error("Could not open camera")
            raise RuntimeError("Camera initialization failed")
        
        # Set camera properties for better performance
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
        self.cap.set(cv2.CAP_PROP_FPS, 30)
        
        logger.info("Camera initialized successfully")
    
    def send_email_alert(self, alert_type: str = "THREAT", person_id: str = "Unknown") -> bool:
        """Send email alert with proper error handling."""
        if not self.config.email_enabled:
            logger.info("Email alerts disabled")
            return False
        
        if not all([self.config.sender_email, self.config.sender_password, self.config.recipient_email]):
            logger.error("Email configuration incomplete")
            return False
        
        try:
            msg = MIMEMultipart()
            msg['From'] = self.config.sender_email
            msg['To'] = self.config.recipient_email
            msg['Subject'] = f"Security Alert: {alert_type}"
            
            timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            body = f"""
            Security Alert Details:
            - Alert Type: {alert_type}
            - Person: {person_id}
            - Time: {timestamp}
            - Location: Main Security Camera
            
            Please review the situation immediately.
            """
            msg.attach(MIMEText(body, 'plain'))
            
            with smtplib.SMTP(self.config.smtp_server, self.config.smtp_port) as server:
                server.starttls()
                server.login(self.config.sender_email, self.config.sender_password)
                server.sendmail(self.config.sender_email, self.config.recipient_email, msg.as_string())
            
            logger.info(f"Email alert sent: {alert_type} - {person_id}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to send email: {e}")
            return False
    
    def play_alarm(self) -> bool:
        """Play alarm sound with cooldown."""
        current_time = cv2.getTickCount() / cv2.getTickFrequency()
        
        if self.sound_loaded and (current_time - self.last_alarm_time) > self.config.alarm_cooldown:
            try:
                pygame.mixer.music.play()
                self.last_alarm_time = current_time
                logger.info("Alarm sound played")
                return True
            except pygame.error as e:
                logger.error(f"Failed to play alarm: {e}")
        
        return False


if __name__ == "__main__":
    cfg = SecurityConfig()
    system = SecuritySystem(cfg)
    system.run()

In [None]:
!python security_system.py

In [None]:
import cv2

from ultralytics import solutions


def count_specific_classes(video_path, output_video_path, model_path, classes_to_count):
    """Count specific classes of objects in a video."""
    cap = cv2.VideoCapture(video_path)
    assert cap.isOpened(), "Error reading video file"
    w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    video_writer = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

    line_points = [(20, 400), (1080, 400)]
    counter = solutions.ObjectCounter(show=True, region=line_points, model=model_path, classes=classes_to_count)

    while cap.isOpened():
        success, im0 = cap.read()
        if not success:
            print("Video frame is empty or processing is complete.")
            break
        results = counter(im0)
        video_writer.write(results.plot_im)

    cap.release()
    video_writer.release()
    cv2.destroyAllWindows()


count_specific_classes("./media_files/animal_surveillance/goru-churi.mp4", "output_specific_classes.avi", "yolo11n.pt", [0, 2])

# AI Security alarms

In [None]:
import cv2

from ultralytics import solutions

cap = cv2.VideoCapture("./media_files/animal_surveillance/goru-churi.mp4")
assert cap.isOpened(), "Error reading video file"

# Video writer
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

from_email = "abc@gmail.com"  # the sender email address
password = "---- ---- ---- ----"  # 16-digits password generated via: https://myaccount.google.com/apppasswords
to_email = "xyz@gmail.com"  # the receiver email address

# Initialize security alarm object
securityalarm = solutions.SecurityAlarm(
    show=True,  # display the output
    model="yolo11m.pt",  # i.e. yolo11s.pt, yolo11m.pt
    records=3,  # total detections count to send an email
    conf=0.25
)

# securityalarm.authenticate(from_email, password, to_email)  # authenticate the email server


# Process video
while cap.isOpened():
    success, im0 = cap.read()

    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = securityalarm(im0)

    print(results)  # access the output

    video_writer.write(results.plot_im)  # write the processed frame.

cap.release()
video_writer.release()
cv2.destroyAllWindows()  # destroy all opened windows

In [None]:
import cv2

from ultralytics import solutions
import pygame


# Setup alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"
if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"Warning: Alarm file {alarm_file} not found")

cap = cv2.VideoCapture("media_files/animal_surveillance/goru-churi.mp4")
assert cap.isOpened(), "Error reading video file"

# Video writer
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

from_email = "deveansari@gmail.com"  # the sender email address
password = "ddgl yjef dlaw tuzg"  # 16-digits password generated via: https://myaccount.google.com/apppasswords
# to_email = "rahat.ansari@live.com"  # the receiver email address
to_email = "rahatansari.tpu@gmail.com"  # the receiver email address

# Initialize security alarm object
securityalarm = solutions.SecurityAlarm(
    show=True,  # display the output
    model="yolo11m.pt",  # i.e. yolo11s.pt, yolo11m.pt
    records=1,  # total detections count to send an email
    classes=[0, 2],  # 0=person, 2=car (as proxy for cattle)
)



# alarm = solutions.SecurityAlarm()
securityalarm.authenticate(from_email, password, to_email)  # authenticate the email server

# Process video
while cap.isOpened():
    success, im0 = cap.read()

    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = securityalarm(im0)

    print(results)  # access the output

    video_writer.write(results.plot_im)  # write the processed frame.

cap.release()
video_writer.release()
cv2.destroyAllWindows()  # destroy all opened windows

In [None]:
import os
import cv2
from ultralytics import solutions
import pygame

# Initialize alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"

if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"‚ö†Ô∏è Warning: Alarm file '{alarm_file}' not found ‚Äî please check the path.")

# Open video
cap = cv2.VideoCapture("media_files/animal_surveillance/goru-churi.mp4")
assert cap.isOpened(), "‚ùå Error: Cannot read video file."

# Video writer setup
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Security Alarm setup
securityalarm = solutions.SecurityAlarm(
    show=True,             # Show annotated video
    model="yolo11m.pt",    # Use YOLOv11 medium model
    records=3,             # Number of detections to trigger event
    classes=[0, 19]        # 0=person, 19=cow
)

# Optional: Email setup (you can disable if not needed)
from_email = "deveansari@gmail.com"
password = "ddgl yjef dlaw tuzg"  # App password
to_email = "rahatansari.tpu@gmail.com"

securityalarm.authenticate(from_email, password, to_email)

# --- PROCESS VIDEO ---
while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("‚úÖ Video processing completed.")
        break

    # Run YOLO detection
    results = securityalarm(im0)
    print(results)  # Log results

cap.release()
video_writer.release()
cv2.destroyAllWindows()


# AI Security
## Introduction
In this section, we will explore the various aspects of AI security, including potential threats, vulnerabilities, and best practices for securing AI systems.

## Threat Landscape
1. **Data Poisoning**: Attackers manipulate training data to compromise model integrity.
2. **Model Inversion**: Unauthorized access to model parameters can reveal sensitive information.
3. **Adversarial Attacks**: Inputs are subtly altered to mislead AI models.

## Best Practices
- **Data Validation**: Implement robust data validation to detect anomalies.
- **Access Controls**: Enforce strict access controls to sensitive model components.
- **Regular Audits**: Conduct regular security audits and penetration testing.

## Conclusion
AI security is a critical consideration in the deployment of AI systems. By understanding the threat landscape and implementing best practices, organizations can better protect their AI assets.

In [None]:
import os
import cv2
from ultralytics import solutions
import pygame

# Initialize alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"

if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"‚ö†Ô∏è Warning: Alarm file '{alarm_file}' not found ‚Äî please check the path.")

# --- Define the Extended Class (as shown above) ---
class SoundAlarm(solutions.SecurityAlarm):
    """
    Extends SecurityAlarm to play a sound alarm when the detection threshold is exceeded.
    """
    def __init__(self, **kwargs):
        # We don't need to pass 'alarm_player' to __init__ if we use pygame.mixer directly
        # But we must call the parent's init
        super().__init__(**kwargs)
        self.sound_played = False  # New flag to ensure sound only plays once per event

    def process(self, im0):
        """
        Overrides the parent's process method to include playing an alarm sound.
        """
        # Call the parent's process method (handles detection, annotation, and email)
        results = super().process(im0) 
        # print("===> ", self)  # Annotated frame

        total_det = len(self.clss)
        
        # Check if the detection threshold is met AND the sound hasn't been played for this event
        if total_det >= self.records and not self.sound_played:
            # Check if the mixer is initialized and sound isn't already playing
            if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
                print("üö® Playing security alarm!")
                pygame.mixer.music.play()
                self.sound_played = True
            
        # If total detections drop below the threshold, reset the flags
        if total_det < self.records and (self.email_sent or self.sound_played):
            self.email_sent = False
            self.sound_played = False
            print("üü¢ Alarm system reset.")

        return results
# ---------------------------------------------------


# Open video
cap = cv2.VideoCapture("media_files/animal_surveillance/goru-churi.mp4")
# cap = cv2.VideoCapture(0)
assert cap.isOpened(), "‚ùå Error: Cannot read video file."

# Video writer setup (same as original)
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Security Alarm setup: USE THE EXTENDED CLASS
securityalarm = SoundAlarm(
    show=True,  # Show annotated video
    model="yolo11m.pt",  # Use YOLOv11 medium model
    records=3,  # Number of detections to trigger event
    # classes=[0, 2] # 0=person, 2=car (using car for cattle)
    show_labels=True,
    show_conf=True,
    conf=0.6
)

# Optional: Email setup
from_email = "deveansari@gmail.com"
password = "ddgl yjef dlaw tuzg" # App password
to_email = "rahatansari.tpu@gmail.com"

# securityalarm.authenticate(from_email, password, to_email)

# --- PROCESS VIDEO ---
# while cap.isOpened():
#     success, im0 = cap.read()
#     if not success:
#         print("‚úÖ Video processing completed.")
#         break

#     # Run YOLO detection. The overridden process() method in SoundAlarm handles the sound.
#     results = securityalarm(im0)
#     print(results) # Log results
    
#     # Allow pygame to process events to keep the sound playing (optional but good practice)
#     pygame.event.pump() 


# Process video
while cap.isOpened():
    success, im0 = cap.read()

    if not success:
        print("‚úÖ Video processing completed.")
        break

    results = securityalarm(im0)

    print(results)  # access the output

    video_writer.write(results.plot_im)  # write the processed frame.

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
video_writer.release()
cv2.destroyAllWindows()
pygame.mixer.quit()  # Clean up pygame mixer

In [None]:
import os
import cv2
import numpy as np
import face_recognition # Required for face recognition
from ultralytics import solutions
from ultralytics.utils import LOGGER
import pygame
from ultralytics import solutions
from ultralytics.utils import LOGGER
from ultralytics.utils.plotting import colors

from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
# Initialize alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"

if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"‚ö†Ô∏è Warning: Alarm file '{alarm_file}' not found ‚Äî please check the path.")

# import requests

# @dataclass
# class Config:
#     # model & known faces
#     MODEL_PATH: str = "yolo11n.pt"  # change to your model
#     KNOWN_FACES_DIR: str = "family_members"
#     ALARM_FILE: str = "pols-aagyi-pols.mp3"
#     LOG_DIR: str = "security_logs"
#     OUTPUT_DIR: str = "security_output"
#     # VIDEO_SOURCE: str = 0  # camera index or video path
#     VIDEO_SOURCE: str = "./media_files/animal_surveillance/goru-churi.mp4"  # camera index or video path
#     FACE_RECOGNITION_INTERVAL: int = 5
#     ALERT_COOLDOWN: int = 10  # seconds global cooldown
#     PERSON_COOLDOWN: int = 20  # per person cooldown seconds
#     YOLO_CONFIDENCE: float = 0.45
#     FACE_DETECTION_CONF: float = 0.5
#     RECOGNITION_DISTANCE_THRESHOLD: float = 0.45
#     RESIZE_FACTOR: float = 0.35

#     # Clip saving
#     SAVE_CLIP_SECONDS: int = 6  # seconds to save when alarm triggers (uses ring buffer)
#     CLIP_FPS: int = 20

#     # GPIO buzzer (optional)
#     USE_GPIO: bool = False
#     BUZZER_PIN: int = 18  # BCM pin; only used if USE_GPIO True and HAS_RPI True
#     BUZZER_SECONDS: float = 5.0

#     # Telegram
#     USE_TELEGRAM: bool = False
#     TELEGRAM_BOT_TOKEN: str = ""  # put your bot token
#     TELEGRAM_CHAT_ID: str = ""    # put your chat id
#     SEND_IMAGE_ON_ALERT: bool = True

#     # secure zone: rectangle (x1,y1,x2,y2) relative fraction of frame: (left, top, right, bottom)
#     # set to None to consider whole frame as secure zone
#     SECURE_ZONE_REL: Optional[Tuple[float, float, float, float]] = (0.0, 0.0, 1.0, 1.0)

#     # recognition thresholds & voting
#     RECOGNITION_MIN_VOTES: int = 2
#     RECOGNITION_CONSECUTIVE_FRAMES: int = 2
#     RECOGNITION_TIME_WINDOW: float = 3.0

#     # drawing & UI
#     WINDOW_NAME: str = "Security Monitoring"

# Load known faces
known_face_encodings = []
known_face_names = []

known_faces_dir = "family_members/"  # Create this directory and add images
if os.path.exists(known_faces_dir):
    for person_name in os.listdir(known_faces_dir):
        person_dir = os.path.join(known_faces_dir, person_name)
        if os.path.isdir(person_dir):
            for image_name in os.listdir(person_dir):
                image_path = os.path.join(person_dir, image_name)
                try:
                    image = face_recognition.load_image_file(image_path)
                    face_encodings = face_recognition.face_encodings(image)
                    if face_encodings:
                        known_face_encodings.append(face_encodings[0])
                        known_face_names.append(person_name)
                        print(f"Loaded face: {person_name} from {image_name}")
                except Exception as e:
                    print(f"Error loading {image_path}: {e}")

if known_face_encodings:
    print(f"Successfully loaded {len(known_face_encodings)} face encodings for {len(set(known_face_names))} people")
else:
    print("Warning: No face encodings loaded. Face recognition will not work.")

# Setup alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"
if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"Warning: Alarm file {alarm_file} not found")
# --- The SoundAlarm Class (Modified to be a Base for FaceRecognitionAlarm) ---
# We redefine SoundAlarm here for completeness, incorporating the previous logic.
class SoundAlarm(solutions.SecurityAlarm):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.sound_played = False
        
    def process(self, im0):
        # Call the parent's process to handle YOLO detection, tracking, and annotation
        super().process(im0) 

        total_det = len(self.clss)
        
        # Check if the detection threshold is met AND the alarm hasn't been played
        if total_det >= self.records and not self.sound_played:
            import pygame
            if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
                LOGGER.info("üö® Playing security alarm!")
                pygame.mixer.music.play()
                self.sound_played = True
            
        # Reset logic for the alarm
        if total_det < self.records and (self.email_sent or self.sound_played):
            self.email_sent = False
            self.sound_played = False
            LOGGER.info("üü¢ Alarm system reset.")
            import pygame
            if pygame.mixer.get_init():
                 pygame.mixer.music.stop()

        # SolutionResults is returned by super().process() in the original class logic,
        # but since we are overriding, we need to manually return it or structure 
        # the original SecurityAlarm.process to return it before any further operations.
        # For simplicity, we assume the base class's main work is done and we only 
        # need to return the annotated frame and flags.
        return SolutionResults(
            plot_im=self.annotator.result(), 
            total_tracks=len(self.track_ids), 
            email_sent=self.email_sent,
            sound_played=self.sound_played # Add sound_played to results
        )

# --- The New Extended Class for Face Recognition ---
class FaceRecognitionAlarm(SoundAlarm):
    def __init__(self, face_data_path="face_data_path", **kwargs):
        super().__init__(**kwargs)
        self.known_face_encodings = []
        self.known_face_names = []
        self.face_data_path = face_data_path
        self._load_known_faces()

    def _load_known_faces(self):
        """Load known faces from the specified directory."""
        self.known_face_encodings = []
        self.known_face_names = []

        self.known_faces_dir = "family_members" 
        if os.path.exists(self.known_faces_dir):
            for person_name in os.listdir(self.known_faces_dir):
                self.person_dir = os.path.join(self.known_faces_dir, person_name)
                if os.path.isdir(self.person_dir):
                    for image_name in os.listdir(self.person_dir):
                        image_path = os.path.join(self.person_dir, image_name)
                        try:
                            image = face_recognition.load_image_file(image_path)
                            face_encodings = face_recognition.face_encodings(image)
                            if face_encodings:
                                self.known_face_encodings.append(face_encodings[0])
                                self.known_face_names.append(person_name)
                                print(f"Loaded face: {person_name} from {image_name}")
                        except Exception as e:
                            print(f"Error loading {image_path}: {e}")

        if known_face_encodings:
            print(f"Successfully loaded {len(known_face_encodings)} face encodings for {len(set(known_face_names))} people")
        else:
            print("Warning: No face encodings loaded. Face recognition will not work.")

    def process(self, im0):
        """
        Overrides process to check for UNKNOWN persons and only trigger the alarm for them.
        """
        self.extract_tracks(im0) 
        self.annotator = SolutionAnnotator(im0, line_width=self.line_width)

        unknown_person_count = 0
        total_person_det = 0

        # YOLOv8 class ID for 'person' is 0
        person_cls_id = 0 
        person_boxes = [box for box, cls in zip(self.boxes, self.clss) if cls == person_cls_id]
        
        # Convert image to RGB for face_recognition (it expects RGB)
        rgb_frame = cv2.cvtColor(im0, cv2.COLOR_BGR2RGB)

        for i, box in enumerate(self.boxes):
            x1, y1, x2, y2 = map(int, box)
            cls = self.clss[i]
            
            # Draw bounding box and label as in original code
            label = self.names[cls]
            color = colors(cls, True)
            
            # --- Face Recognition Logic ---
            is_unknown = False
            
            # Check only for 'person' detections
            if cls == person_cls_id: 
                total_person_det += 1
                
                # Extract the face ROI (top, right, bottom, left) from the detection box
                # Face recognition is often faster/more accurate on smaller ROIs
                # Since the box is for the whole person, we'll use the whole person box for face finding
                
                # Convert Ultralytics bbox format (x1, y1, x2, y2) to face_recognition's (top, right, bottom, left)
                face_locations = [(y1, x2, y2, x1)] 
                
                face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

                if face_encodings:
                    face_encoding = face_encodings[0]
                    # Compare face with known faces
                    matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding)
                    face_distance = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                    
                    best_match_index = np.argmin(face_distance)
                    
                    if matches[best_match_index]:
                        name = self.known_face_names[best_match_index]
                        label = f"{name} (Known)"
                        color = (0, 255, 0) # Green for known
                    else:
                        name = "Unknown"
                        label = f"{name} (Alarm!)"
                        color = (0, 0, 255) # Red for unknown
                        is_unknown = True
                        unknown_person_count += 1
                else:
                    # If no face is found (e.g., person is far or side profile), consider it known/ignore
                    # depending on the security requirement. Here we'll default to the original label
                    label = f"{self.names[cls]} (Face Hidden/Far)"

            # Apply final label and color
            self.annotator.box_label(box, label=label, color=colors(cls, True))

        # Alarm Trigger Logic: Use UNKNOWN_PERSON_COUNT instead of total_det
        # Override the inherited flags *before* calling the parent's alarm logic
        self.clss = [1] * unknown_person_count # Hack to force parent's total_det check on unknown persons

        # Execute parent's alarm logic, which will now use unknown_person_count
        results = super().process(im0) 
        
        # Restore the correct total detections if needed for external logging
        results.total_detections = len(self.boxes)
        results.unknown_persons = unknown_person_count
        
        return SolutionResults(plot_im=results.plot_im, total_tracks=len(self.track_ids), email_sent=self.email_sent)
# import os
# import cv2
# from ultralytics import solutions
# import pygame
# Note: The custom classes (SoundAlarm, FaceRecognitionAlarm) need to be defined 
# or imported before they are used here. For a single script, define them at the top.



# Open video
# cap = cv2.VideoCapture("media_files/animal_surveillance/goru-churi.mp4")
cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")
assert cap.isOpened(), "‚ùå Error: Cannot read video file."

# Video writer setup (kept for completeness)
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Security Alarm setup: USE THE EXTENDED CLASS
# records=1 means alarm will trigger if 1 or more UNKNOWN persons are detected.
securityalarm = FaceRecognitionAlarm(
    show=True, # Show annotated video
    model="yolo11m.pt",     # Use a smaller model for speed
    records=3,# Number of UNKNOWN detections to trigger event
    # classes=[0, 2],            # Only detect 'person' (ID 0 in COCO) and 'bicycle' (ID 2 in COCO)
    face_data_path=known_faces_dir, # Pass the path to face data
    conf=0.5,
    
)

# Optional: Email setup
from_email = "deveansari@gmail.com"
password = "ddgl yjef dlaw tuzg" # App password
to_email = "rahatansari.tpu@gmail.com"

# securityalarm.authenticate(from_email, password, to_email)

# --- PROCESS VIDEO ---
print("\n--- Starting Video Processing. Press 'q' to terminate. ---")
while cap.isOpened():
    success, im0 = cap.read()
    if not success:
        print("‚úÖ Video processing completed.")
        break

    # Run Detection and Alarm Logic
    results = securityalarm(im0)
    
    # Check for 'q' key press to terminate
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("üõë Termination key 'q' pressed. Stopping...")
        break
    
    # Allow pygame to process events
    # pygame.event.pump() 

# Cleanup
cap.release()
video_writer.release()
cv2.destroyAllWindows()
pygame.mixer.quit()

### 2nd Best AI Security Alarms with Face Recognition

In [None]:
import os
import cv2
import numpy as np
import face_recognition
from ultralytics import solutions
from ultralytics.utils import LOGGER
import pygame
from ultralytics import solutions 
# Import SolutionAnnotator directly from where it lives (as it was in the original SecurityAlarm definition)
from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
from ultralytics.utils import LOGGER
from ultralytics.utils.plotting import colors

# --- 1. Pygame and Known Faces Setup (Global) ---

# Initialize alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"
if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"‚ö†Ô∏è Warning: Alarm file '{alarm_file}' not found ‚Äî please check the path.")

# Define the directory for known faces
KNOWN_FACES_DIR = "family_members" 

# --- 2. Extended Class Definitions ---

# --- SoundAlarm Class (Base for audio functionality) ---
class SoundAlarm(solutions.SecurityAlarm):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.sound_played = False
        
    def process(self, im0):
        # The parent's process call here is for annotation, but the inherited 
        # class will override this to manage the detection list (self.clss)
        # for alarm purposes. We will rely on the logic below to handle the alarm 
        # based on the (possibly modified) self.clss and then call parent's 
        # methods explicitly for better control.

        # 1. Annotation (Draw boxes based on current self.boxes/self.clss)
        # The base class's process method does this, but we'll manually draw 
        # after face recognition to show the correct label/color.
        
        total_det = len(self.clss)
        
        # 2. Alarm Trigger Check (Email/Sound)
        if total_det >= self.records and not (self.email_sent and self.sound_played):
            # Alarm Condition Met - Check individual flags
            if not self.email_sent:
                # Assuming email setup is working or uncommented
                # self.send_email(im0, total_det)
                self.email_sent = True
                LOGGER.info("üìß Email alert condition met.")
                
            if not self.sound_played:
                if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
                    LOGGER.info("üö® Playing security alarm!")
                    pygame.mixer.music.play()
                    self.sound_played = True
            
        # 3. Reset logic for the alarm
        if total_det < self.records and (self.email_sent or self.sound_played):
            self.email_sent = False
            self.sound_played = False
            LOGGER.info("üü¢ Alarm system reset.")
            if pygame.mixer.get_init():
                 pygame.mixer.music.stop()

        # Final annotation and display (to be handled in FaceRecognitionAlarm for corrected labels)
        
        # Returning a SolutionResults object is essential
        return SolutionResults(
             plot_im=im0, # This will be the annotated image from FaceRecognitionAlarm
             total_tracks=len(getattr(self, 'track_ids', [])), 
             email_sent=self.email_sent,
             sound_played=self.sound_played 
        )


# --- FaceRecognitionAlarm Class (Consolidated logic) ---
class FaceRecognitionAlarm(solutions.SecurityAlarm): # Inherit from SecurityAlarm, not SoundAlarm now for cleaner override
    def __init__(self, face_data_path, **kwargs):
        super().__init__(**kwargs)
        self.known_face_encodings = []
        self.known_face_names = []
        self.face_data_path = face_data_path
        self.sound_played = False # Add sound state here for alarm control
        self._load_known_faces()

    def _load_known_faces(self):
        """Loads face encodings from the specified directory and updates instance attributes."""
        self.known_face_encodings = []
        self.known_face_names = []

        if not os.path.exists(self.face_data_path):
             LOGGER.warning(f"Face data path '{self.face_data_path}' not found. Cannot load known faces.")
             return

        # Use os.walk to search through family_members and authorize_person subdirectories
        for root, dirs, files in os.walk(self.face_data_path):
            for image_name in files:
                if image_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                    image_path = os.path.join(root, image_name)
                    # The person name is the parent directory name
                    person_name = os.path.basename(root)

                    try:
                        image = face_recognition.load_image_file(image_path)
                        face_encodings = face_recognition.face_encodings(image)
                        if face_encodings:
                            self.known_face_encodings.append(face_encodings[0])
                            self.known_face_names.append(person_name)
                            LOGGER.info(f"Loaded face: {person_name} from {image_name}")
                        else:
                            LOGGER.warning(f"No face found in image: {image_path}")
                    except Exception as e:
                        LOGGER.error(f"Error loading {image_path}: {e}")

        if self.known_face_encodings:
            LOGGER.info(f"Successfully loaded {len(self.known_face_encodings)} face encodings for {len(set(self.known_face_names))} people")
        else:
            LOGGER.warning("No known faces were loaded. All detected persons will be considered unknown.")

    def process(self, im0):
        """Overrides process to check for UNKNOWN persons and only trigger the alarm for them."""
        self.extract_tracks(im0) 
        annotator = SolutionAnnotator(im0, line_width=self.line_width)

        unknown_person_count = 0
        person_cls_id = 0 # COCO class ID for 'person'

        # Convert image to RGB for face_recognition (it expects RGB)
        # Resize the frame for faster face recognition (optional, but highly recommended)
        small_frame = cv2.resize(im0, (0, 0), fx=0.25, fy=0.25)
        rgb_small_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
        
        # Find all face locations and encodings in the small frame
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
        
        # List to store the actual labels/colors to be drawn
        labels_to_draw = []
        
        # Only process objects that are people
        for i, box in enumerate(self.boxes):
            cls = self.clss[i]
            
            # Default label and color (YOLO detection)
            label = self.names[cls]
            color = colors(cls, True)
            is_unknown = False

            if cls == person_cls_id:
                # Check for a corresponding face detection within the person's bounding box
                x1, y1, x2, y2 = map(int, box)
                (h, w) = im0.shape[:2]

                found_face = False
                
                # Check each detected face location against the YOLO person box
                for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
                    # Scale face locations back up to original image size
                    top *= 4
                    right *= 4
                    bottom *= 4
                    left *= 4
                    
                    # Check if the center of the face is inside the YOLO person box
                    face_center_x = (left + right) // 2
                    face_center_y = (top + bottom) // 2

                    if x1 <= face_center_x <= x2 and y1 <= face_center_y <= y2:
                        found_face = True
                        
                        # Perform face recognition
                        if self.known_face_encodings:
                            matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding, tolerance=0.55) # Tweak tolerance
                            face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                            best_match_index = np.argmin(face_distances)
                            
                            if matches[best_match_index]:
                                name = self.known_face_names[best_match_index]
                                label = f"{name} (Known)"
                                color = (0, 255, 0) # Green
                            else:
                                name = "Unknown"
                                label = f"{name} (ALARM!)"
                                color = (0, 0, 255) # Red
                                is_unknown = True
                                unknown_person_count += 1
                        else:
                            # No known faces loaded, treat as unknown if a face is detected
                            name = "Unknown"
                            label = f"{name} (ALARM!)"
                            color = (0, 0, 255) # Red
                            is_unknown = True
                            unknown_person_count += 1
                        
                        break # Stop searching for more matching faces for this person box
                
                if not found_face:
                    label = f"{self.names[cls]} (No Face)" # Use default color if no face is found

            # Store the data for drawing
            annotator.box_label(box, label=label, color=color)

        # Alarm Trigger Logic: Use UNKNOWN_PERSON_COUNT
        # Temporarily update self.clss for the parent class's email/sound checks
        self.clss = [1] * unknown_person_count 
        
        # --- Alarm Control (Manual implementation from SoundAlarm) ---
        total_det = len(self.clss) # This is now the unknown_person_count
        
        if total_det >= self.records:
            if not self.email_sent:
                # self.send_email(im0, total_det) # Uncomment if email is setup
                self.email_sent = True
            if not self.sound_played:
                if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
                    LOGGER.info("üö® Playing security alarm!")
                    pygame.mixer.music.play()
                    self.sound_played = True
        
        # Reset logic
        if total_det < self.records and (self.email_sent or self.sound_played):
            self.email_sent = False
            self.sound_played = False
            LOGGER.info("üü¢ Alarm system reset.")
            if pygame.mixer.get_init():
                 pygame.mixer.music.stop()
        # --- End Alarm Control ---
        
        plot_im = annotator.result()
        self.display_output(plot_im) 

        # Return the SolutionResults with the correct flags and counts
        return SolutionResults(
            plot_im=plot_im, 
            total_tracks=len(self.track_ids), 
            email_sent=self.email_sent,
            sound_played=self.sound_played,
            unknown_persons=unknown_person_count
        )


# --- 3. Main Execution Block ---

# Open video
cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")
assert cap.isOpened(), "‚ùå Error: Cannot read video file."

# Video writer setup
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Security Alarm setup: USE THE EXTENDED CLASS
# records=3 means alarm will trigger if 3 or more UNKNOWN persons are detected.
securityalarm = FaceRecognitionAlarm(
    show=True, 
    model="yolo11s.pt", # Changed to a faster model for real-time face recognition
    records=1,
    # classes=[0, 2],        # Only detect 'person' (ID 0) for face recognition
    face_data_path=KNOWN_FACES_DIR,
    conf=0.5,
)

# Optional: Email setup
from_email = "deveansari@gmail.com"
password = "ddgl yjef dlaw tuzg" 
to_email = "rahatansari.tpu@gmail.com"

# securityalarm.authenticate(from_email, password, to_email) # Uncomment and fix password/email

# --- PROCESS VIDEO ---
# print("\n--- Starting Video Processing. Press 'q' to terminate. ---")
# while cap.isOpened():
#     ret, frame = cap.read()
#     if not ret:
#         print("‚úÖ Video processing completed.")
#         break

#     # Run Detection and Alarm Logic
#     results = securityalarm(frame)
#     writer.write(results.plot_im)
#     cv2.imshow("Face Recognition Security Alarm", results.plot_im)
#     # Process video
while cap.isOpened():
    success, im0 = cap.read()

    if not success:
        print("‚úÖ Video processing completed.")
        break

    results = securityalarm(im0)

    print(results)  # access the output

    # video_writer.write(results.plot_im)  # write the processed frame.

    # Check for 'q' key press to terminate
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("üõë Termination key 'q' pressed. Stopping...")
        break
    
    # Allow pygame to process events
    # pygame.event.pump() 

# Cleanup
cap.release()
video_writer.release()
cv2.destroyAllWindows()
pygame.mixer.quit()

In [None]:
import os
import cv2
import numpy as np
import face_recognition
import pygame
from ultralytics import solutions
from ultralytics.utils import LOGGER

from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults

# ========== üîä SOUND SETUP ==========
pygame.mixer.init()
ALARM_FILE = "pols-aagyi-pols.mp3"
if os.path.exists(ALARM_FILE):
    pygame.mixer.music.load(ALARM_FILE)
else:
    print(f"[WARNING] Alarm file '{ALARM_FILE}' not found.")


# ========== üß† KNOWN FACE ENCODING LOADER ==========
KNOWN_FACE_DIR = "family_members"
known_face_encodings, known_face_names = [], []

if os.path.exists(KNOWN_FACE_DIR):
    for name in os.listdir(KNOWN_FACE_DIR):
        person_dir = os.path.join(KNOWN_FACE_DIR, name)
        if not os.path.isdir(person_dir):
            continue
        for filename in os.listdir(person_dir):
            path = os.path.join(person_dir, filename)
            try:
                img = face_recognition.load_image_file(path)
                enc = face_recognition.face_encodings(img)
                if enc:
                    known_face_encodings.append(enc[0])
                    known_face_names.append(name)
                    print(f"[INFO] Loaded face for {name} from {filename}")
            except Exception as e:
                print(f"[ERROR] Failed loading {path}: {e}")
else:
    print("[WARNING] No known_faces directory found.")


# ========== ‚öôÔ∏è EXTENDED SECURITY ALARM WITH SOUND ==========
class SoundAlarm(solutions.SecurityAlarm):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.sound_played = False
        self.email_sent = False

    def trigger_alarm(self):
        """Play sound + send email."""
        if not self.sound_played:
            if pygame.mixer.get_init():
                pygame.mixer.music.play()
                print("[ALARM] ‚ö†Ô∏è Unknown person detected ‚Äî alarm triggered!")
            self.sound_played = True
        if not self.email_sent:
            try:
                super().trigger_alarm()  # optional email alert
                self.email_sent = True
            except Exception as e:
                LOGGER.warning(f"Email alert failed: {e}")

    def reset_alarm(self):
        """Stop alarm and reset states."""
        if self.sound_played:
            if pygame.mixer.get_init():
                pygame.mixer.music.stop()
                print("[ALARM] ‚úÖ Alarm stopped ‚Äî area clear.")
        self.sound_played = False
        self.email_sent = False

    def __call__(self, im0):
        # call base detection
        results = super().__call__(im0)
        return SolutionResults(
            im0=im0,
            plot_im=results.plot_im if hasattr(results, "plot_im") else im0,
            dets=results.dets if hasattr(results, "dets") else None
        )


# ========== üëÅÔ∏è FACE-RECOGNITION ALARM COMBINATION ==========
class FaceRecognitionAlarm(solutions.SecurityAlarm):
    def __init__(self, *args, known_face_encodings=None, known_face_names=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.known_face_encodings = known_face_encodings or []
        self.known_face_names = known_face_names or []
        self.sound_played = False

    def play_sound(self):
        if not self.sound_played:
            pygame.mixer.music.play()
            self.sound_played = True
            print("[ALARM] üö® Unknown face ‚Äî sound started!")

    def reset_sound(self):
        if self.sound_played:
            pygame.mixer.music.stop()
            self.sound_played = False
            print("[ALARM] üîá Sound reset.")

    def __call__(self, im0):
        self.extract_tracks(im0)
        annotator = SolutionAnnotator(im0, line_width=self.line_width)
        unknown_detected = False

        for xyxy, conf, cls in zip(self.boxes, self.confs, self.clss):
            if int(cls) != 0:  # 0 = person
                continue

            x1, y1, x2, y2 = map(int, xyxy)
            face_image = im0[y1:y2, x1:x2]
            if face_image.size == 0:
                continue

            rgb_face = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
            encs = face_recognition.face_encodings(rgb_face)
            name = "Unknown"

            if encs:
                matches = face_recognition.compare_faces(self.known_face_encodings, encs[0])
                if True in matches:
                    name = self.known_face_names[matches.index(True)]

            color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)
            label = f"{name} ({conf:.2f})"
            annotator.box_label(xyxy, label, color=color)

            if name == "Unknown":
                unknown_detected = True

        if unknown_detected:
            self.play_sound()
        else:
            self.reset_sound()

        total_tracks = len(getattr(self, "track_ids", []))
        if total_tracks > 0:
            cv2.putText(
                im0,
                f"Tracks: {total_tracks}",
                (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX,
                1,
                (255, 255, 255),
                2,
            )

        return SolutionResults(im0=im0, plot_im=annotator.result())


# ========== üé• MAIN LOOP ==========
if __name__ == "__main__":
    cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")
    # cap = cv2.VideoCapture(0)
    assert cap.isOpened(), "Error: video not found or cannot be opened."

    w, h, fps = (int(cap.get(x)) for x in
                 (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    writer = cv2.VideoWriter("security_output.avi",
                             cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

    face_alarm = FaceRecognitionAlarm(
        show=True,
        model="yolo11m.pt",
        records=3,
        # classes=[0],  # person
        known_face_encodings=known_face_encodings,
        known_face_names=known_face_names
    )

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("[INFO] Video finished or empty frame.")
            break

        results = face_alarm(frame)
        writer.write(results.plot_im)
        cv2.imshow("Face Recognition Security Alarm", results.plot_im)
        # pygame.event.pump()

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    pygame.quit()
    print("[INFO] Surveillance session ended.")


## Best face recognition system

In [None]:
import os
import cv2
import numpy as np
import face_recognition
from ultralytics import solutions
from ultralytics.utils import LOGGER
import pygame
from pathlib import Path  # Fix: Import the Path object
from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
from ultralytics.utils.plotting import colors
# --- 1. Pygame and Known Faces Setup (Global) ---

# Initialize alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"
if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"‚ö†Ô∏è Warning: Alarm file '{alarm_file}' not found ‚Äî please check the path.")

# Define the directory for known faces
KNOWN_FACES_DIR = "family_members/" 

# --- 2. Extended Class Definitions ---

# --- FaceRecognitionAlarm Class (Consolidated logic) ---
class FaceRecognitionAlarm(solutions.SecurityAlarm): # Inherit from SecurityAlarm, not SoundAlarm now for cleaner override
    """
    A security alarm that uses face recognition to trigger alerts only for unknown persons.
    Optimized to run face recognition intermittently for better performance.
    """
    def __init__(self, face_data_path, **kwargs):
        super().__init__(**kwargs)
        self.known_face_encodings = []
        self.known_face_names = []
        self.face_data_path = face_data_path
        self.sound_played = False # Add sound state here for alarm control

        # Optimization attributes
        self.frame_count = 0
        self.recognition_interval = 5  # Process face recognition every 5 frames
        self.tracked_faces = {}  # Stores recognition state for each track_id: {'name': str, 'cooldown': int}
        self.recognition_cooldown = 15  # Frames to wait before re-checking a face

        self._load_known_faces()

    def _load_known_faces(self):
        """Loads face encodings from a directory with a 'person_name/image.jpg' structure."""
        LOGGER.info(f"Loading known faces from '{self.face_data_path}'...")
        if not os.path.exists(KNOWN_FACES_DIR):
            LOGGER.warning(f"Known faces directory '{KNOWN_FACES_DIR}' not found.")
            return

        for person_name in os.listdir(KNOWN_FACES_DIR):
            person_path = Path(KNOWN_FACES_DIR) / person_name
            if person_path.is_dir():
                for image_file in person_path.glob('*[.jpg,.jpeg,.png]'):
                    try:
                        image = face_recognition.load_image_file(str(image_file))
                        encodings = face_recognition.face_encodings(image)
                        if encodings:
                            self.known_face_encodings.append(encodings[0])
                            self.known_face_names.append(person_name)
                            LOGGER.info(f"  - Loaded face for '{person_name}' from {image_file.name}")
                        else:
                            LOGGER.warning(f"No face found in {image_file}")
                    except Exception as e:
                        LOGGER.error(f"Error loading {image_file}: {e}")
        
        if not self.known_face_encodings:
            LOGGER.warning("No known faces loaded. All detected persons will be 'Unknown'.")
        else:
            LOGGER.info(f"Successfully loaded {len(self.known_face_encodings)} faces for {len(set(self.known_face_names))} people.")

    def _get_face_encodings(self, im0):
        """Detects faces and computes encodings, but only on interval frames."""
        if self.frame_count % self.recognition_interval == 0:
            small_frame = cv2.resize(im0, (0, 0), fx=0.25, fy=0.25)
            rgb_small_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_small_frame)
            face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
            return face_locations, face_encodings
        return [], []

    def _handle_alarm_logic(self, unknown_person_count):
        """Manages the triggering and resetting of the sound and email alarm."""
        if unknown_person_count >= self.records:
            if not self.email_sent:
                # self.send_email(im0, unknown_person_count) # Uncomment if email is setup
                self.email_sent = True
                LOGGER.info(f"üìß Email alert condition met for {unknown_person_count} unknown person(s).")
            if not self.sound_played:
                if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
                    LOGGER.info("üö® Playing security alarm!")
                    pygame.mixer.music.play()
                    self.sound_played = True
        elif self.email_sent or self.sound_played:
            self.email_sent = False
            self.sound_played = False
            LOGGER.info("üü¢ Alarm system reset.")
            if pygame.mixer.get_init():
                 pygame.mixer.music.stop()

    def process(self, im0):
        """Overrides process to add optimized face recognition and alarm logic."""
        self.frame_count += 1
        self.extract_tracks(im0)
        annotator = SolutionAnnotator(im0, line_width=self.line_width)

        unknown_person_count = 0
        person_cls_id = 0  # COCO class ID for 'person'

        face_locations, face_encodings = self._get_face_encodings(im0)

        # Process each tracked object
        for i, (box, track_id) in enumerate(zip(self.boxes, self.track_ids)):
            cls = self.clss[i]
            label = self.names[cls]
            color = colors(cls, True)

            if cls == person_cls_id:
                if track_id not in self.tracked_faces or self.tracked_faces[track_id]['cooldown'] == 0:
                    if face_encodings:
                        x1, y1, x2, y2 = map(int, box)
                        found_match = False
                        for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
                            top, right, bottom, left = top * 4, right * 4, bottom * 4, left * 4
                            face_center_x, face_center_y = (left + right) // 2, (top + bottom) // 2

                            if x1 <= face_center_x <= x2 and y1 <= face_center_y <= y2:
                                matches = face_recognition.compare_faces(self.known_face_encodings, face_encoding, tolerance=0.55)
                                name = "Unknown"
                                if True in matches:
                                    best_match_index = np.argmin(face_recognition.face_distance(self.known_face_encodings, face_encoding))
                                    if matches[best_match_index]:
                                        name = self.known_face_names[best_match_index]

                                self.tracked_faces[track_id] = {'name': name, 'cooldown': self.recognition_cooldown}
                                found_match = True
                                break
                        
                        if not found_match:
                            self.tracked_faces[track_id] = {'name': 'No Face', 'cooldown': self.recognition_cooldown}

                if track_id in self.tracked_faces:
                    face_info = self.tracked_faces[track_id]
                    name = face_info['name']
                    if name == "Unknown":
                        label, color = "Unknown (ALARM!)", (0, 0, 255)
                        unknown_person_count += 1
                    elif name == "No Face":
                        label, color = "Person (No Face)", (255, 192, 203) # Pink
                    else:
                        label, color = f"{name} (Known)", (0, 255, 0)
                    
                    if face_info['cooldown'] > 0:
                        self.tracked_faces[track_id]['cooldown'] -= 1

            annotator.box_label(box, label=label, color=color)

        self._handle_alarm_logic(unknown_person_count)

        plot_im = annotator.result()
        self.display_output(plot_im) 

        # Return the SolutionResults with the correct flags and counts
        return SolutionResults(
            plot_im=plot_im, 
            # im0=im0,
            total_tracks=len(getattr(self, 'track_ids', [])), 
            email_sent=self.email_sent,
            sound_played=self.sound_played,
        )


# --- 3. Main Execution Block ---

# Open video
# cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")
cap = cv2.VideoCapture("media_files/theaf_surveillance/1093628701-preview.mp4")
assert cap.isOpened(), "‚ùå Error: Cannot read video file."

# Video writer setup
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Security Alarm setup: USE THE EXTENDED CLASS
# records=3 means alarm will trigger if 3 or more UNKNOWN persons are detected.
securityalarm = FaceRecognitionAlarm(
    show=True, 
    model="yolo11m.pt", # Use a valid and fast model
    records=1,
    # classes=[0, 2],        # Only detect 'person' (ID 0) and 'bicycle' (ID 2) for face recognition
    face_data_path=KNOWN_FACES_DIR,
    conf=0.5
)

# Optional: Email setup
from_email = "deveansari@gmail.com"
password = "ddgl yjef dlaw tuzg" 
to_email = "rahatansari.tpu@gmail.com"

# securityalarm.authenticate(from_email, password, to_email) # Uncomment and fix password/email

# --- PROCESS VIDEO ---
print("\n--- Starting Video Processing. Press 'q' to terminate. ---")
while cap.isOpened():
    success, frame = cap.read()
    if not success:
        print("‚úÖ Video processing completed.")
        break

    # Run Detection and Alarm Logic
    results = securityalarm(frame)
    # video_writer.write(results.plot_im)  # write the processed frame.
    # cv2.imshow("Face Recognition Security Alarm", results.plot_im)
    
    # Check for 'q' key press to terminate
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("üõë Termination key 'q' pressed. Stopping...")
        break
    
    # Allow pygame to process events
    # pygame.event.pump() 

# Cleanup
cap.release()
video_writer.release()
cv2.destroyAllWindows()
pygame.mixer.quit()

In [None]:
import os
import cv2
import numpy as np
import face_recognition
import pygame
from ultralytics import solutions
# from ultralytics.engine.results import Results
from ultralytics.utils import LOGGER

from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
from ultralytics.engine.results import Results
from ultralytics.utils.plotting import colors
from typing import Any, Dict, List, Tuple
# ========== üîä SOUND SETUP ==========
pygame.mixer.init()
ALARM_FILE = "pols-aagyi-pols.mp3"
if os.path.exists(ALARM_FILE):
    pygame.mixer.music.load(ALARM_FILE)
else:
    print(f"[WARNING] Alarm file '{ALARM_FILE}' not found.")


# ========== üß† KNOWN FACE ENCODING LOADER ==========
KNOWN_FACE_DIR = "family_members/"
known_face_encodings, known_face_names = [], []

if os.path.exists(KNOWN_FACE_DIR):
    for name in os.listdir(KNOWN_FACE_DIR):
        person_dir = os.path.join(KNOWN_FACE_DIR, name)
        if not os.path.isdir(person_dir):
            continue
        for filename in os.listdir(person_dir):
            path = os.path.join(person_dir, filename)
            try:
                img = face_recognition.load_image_file(path)
                enc = face_recognition.face_encodings(img)
                if enc:
                    known_face_encodings.append(enc[0])
                    known_face_names.append(name)
                    print(f"[INFO] Loaded face for {name} from {filename}")
            except Exception as e:
                print(f"[ERROR] Failed loading {path}: {e}")
else:
    print("[WARNING] No known_faces directory found.")

# ...existing code...
def update_labels_info(label: dict[str, Any]) -> dict[str, Any]:
    """
    Normalize and augment a detection/segmentation label dict so you can
    draw a custom label containing a stable track id (or a bbox-based fallback).

    Expected/optional keys in `label`:
      - 'box' : [x1, y1, x2, y2]  (xyxy)
      - 'cls'  : int class id
      - 'conf' : float confidence
      - 'track_id' or 'id' : tracker id (preferred)
      - 'name' or 'cls_name' : human-readable class/name
      - 'custom_prefix' : optional prefix for the displayed label (default 'person')

    Returns same dict with new keys:
      - 'track_id' (guaranteed)
      - 'display_name' (string to pass to annotator.box_label)
    """
    # ensure keys exist
    track_id = label.get("track_id") or label.get("id") or label.get("track") or None

    # fallback: derive stable id from bbox center if no track id provided
    if track_id is None and label.get("box") is not None:
        try:
            x1, y1, x2, y2 = map(int, label["box"])
            track_id = f"{(x1 + x2)//2}_{(y1 + y2)//2}"
        except Exception:
            track_id = "0"

    # readable name
    name = label.get("name") or label.get("cls_name") or ""
    if not name and label.get("cls") is not None and hasattr(label.get("cls"), "__int__"):
        # optional mapping: if you have model.names available you can set it externally on label
        name = label.get("label") or ""

    conf = label.get("conf")
    prefix = str(label.get("custom_prefix", ""))

    # build display text
    base = name if name else f"class_{label.get('cls','?')}"
    if conf is not None:
        # display = f"{prefix}_{track_id} {base} ({float(conf):.2f})"
        display = f"{prefix}{track_id} {base} ({float(conf):.2f})"
    else:
        # display = f"{prefix}_{track_id} {base}"
        display = f"{prefix}{track_id} {base}"
        
    # label["cls"] = cls
    label["track_id"] = track_id
    label["display_name"] = display
    return label
# ...existing code...
# ========== üëÅÔ∏è FACE-RECOGNITION ALARM (REVISED & OPTIMIZED) ==========
class FaceRecognitionAlarm(solutions.SecurityAlarm):
    def __init__(self, *args, known_face_encodings=None, known_face_names=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.known_face_encodings = known_face_encodings or []
        self.known_face_names = known_face_names or []
        self.sound_played = False
        # Best practice: Set face recognition tolerance during initialization
        self.face_tolerance = 0.55
        
        self.show_conf = self.CFG.get("show_conf", True)
        self.show_labels = self.CFG.get("show_labels", True)
        self.show_boxes = self.CFG.get("show_boxes", True)
        
   
    def play_sound(self):
        """Plays the alarm sound if it's not already playing."""
        if not self.sound_played:
            if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
                pygame.mixer.music.play()
                self.sound_played = True
                LOGGER.info("üö® Alarm Triggered: Unknown person count reached threshold.")

    def reset_sound(self):
        """Stops the alarm sound and resets the state."""
        if self.sound_played:
            if pygame.mixer.get_init():
                pygame.mixer.music.stop()
            self.sound_played = False
            LOGGER.info("üü¢ Alarm Reset: Area clear.")

    def __call__(self, im0):
        """
        Processes a single frame for person detection and face recognition.
        This implementation follows best practices for accuracy and performance.
        """
        # 1. Get person detections from the base class
        self.extract_tracks(im0)
        annotator = SolutionAnnotator(im0, line_width=self.line_width)
         # plot_im = annotator.result()
        self.masks = getattr(self.tracks[0], "masks", None)

        # Annotation for segmentation masks
        # Iterate over detected classes, track IDs, and segmentation masks
        if self.masks is None:
            self.LOGGER.warning("No masks detected! Ensure you're using a supported Ultralytics segmentation model.")
            plot_im = im0
        else:
            results = Results(im0, path=None, names=self.names, boxes=self.track_data.data, masks=self.masks.data)
            plot_im = results.plot(
                line_width=self.line_width,
                boxes=self.show_boxes,
                conf=self.show_conf,
                labels=self.show_labels,
                color_mode="instance"  
            )
       

        # self.display_output(plot_im)
        # return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids))
        # Display the annotated output using the base class function

        unknown_person_count = 0

        # 2. Optimize by finding all faces in the frame at once (on a smaller version)
        # This is much faster than processing crops for each person.
        h, w, _ = im0.shape
        small_frame = cv2.resize(im0, (0, 0), fx=0.25, fy=0.25)
        rgb_small_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)

        # 3. Iterate through detected PERSONS from YOLO
        for i, (box, conf, cls) in enumerate(zip(self.boxes, self.confs, self.clss)):
            if int(cls) != 0:  # Skip if not a person
                continue

            name = "Unknown"
            is_known = False
            
            # 4. Associate faces with person boxes
            # Check if any detected face is inside this person's bounding box
            person_box_left, person_box_top, person_box_right, person_box_bottom = map(int, box)
            
            for (face_top, face_right, face_bottom, face_left), face_encoding in zip(face_locations, face_encodings):
                # Scale face locations back to original image size
                face_top *= 8
                face_right *= 8
                face_bottom *= 8
                face_left *= 8

                # Check if the center of the face is inside the person's box
                face_center_x = (face_left + face_right) // 2
                face_center_y = (face_top + face_bottom) // 2

                if (person_box_left <= face_center_x <= person_box_right and
                    person_box_top <= face_center_y <= person_box_bottom):
                    
                    # 5. Use robust face matching for the associated face
                    if self.known_face_encodings:
                        face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                        best_match_index = np.argmin(face_distances)
                        
                        if face_distances[best_match_index] < self.face_tolerance:
                            name = self.known_face_names[best_match_index]
                            is_known = True
                    
                    # Once a face is matched to this person, stop checking other faces
                    break 
            
            # 6. Update counter and draw labels
            if not is_known:
                unknown_person_count += 1
                color = (0, 0, 255) # Red for Unknown
                # label = f"Unknown ({conf:.2f})"
                label = f"Unknown (person)"
                # label = f"person_{int(id)}"
                # custom_string = "Custom Text: "
                # label = f'{custom_string}{f"person_{int(id)}"} {self.names[int(cls)]} {conf:.2f}'
                print("‚ö†Ô∏è Unknown Person Detected!")
            else:
                color = (0, 255, 0) # Green for Known
                # label = f"{name} ({conf:.2f})"
                label = f"{name}"
                print(f"‚úÖ Known Person Detected: {name}")
            
            # annotator = SolutionAnnotator(plot_im, line_width=self.line_width)
            # annotator.box_label.txt_color = self.get_txt_color(self.color, self.txt_color)
            # annotator.box_label(box, label, color)
            # annotator.box_label
                        # ...existing code...
            # inside your frame processing loop / FaceRecognitionAlarm.process after you decide color/known status
            label_dict = {
                "box": box,                 # (x1,y1,x2,y2) from detection
                "cls": int(cls),
                "conf": float(conf),
                "track_id": None,           # try prefer self.track_ids mapping below
                "name": name,               # resolved person name or "Unknown"
                "custom_prefix": "id:"      # optional change to "intruder", etc.
            }

            # resolve track_id from solution's track_ids list when available
            if hasattr(self, "track_ids") and isinstance(self.track_ids, (list, tuple)):
                # find corresponding index - i should be available if iterating with enumerate
                try:
                    label_dict["track_id"] = self.track_ids[i]
                except Exception:
                    # fallback left to update_labels_info
                    pass

            # label_dict = update_labels_info(label_dict)
            # annotator.box_label(box, label=label_dict["display_name"], color=color)
            # annotator.box_label(box, label=label_dict["display_name"])
            # ...existing code...
            annotator.box_label(box, label, color)
            

        # 7. Trigger alarm based on the COUNT of unknown people and the 'records' threshold
        if unknown_person_count >= self.records:
            self.play_sound()
        else:
            self.reset_sound()

        plot_im = annotator.result()
        # plot_im = SolutionAnnotator(im0=plot_im, line_width=self.line_width)

        
        # Display track count on the frame
        total_tracks = len(getattr(self, "track_ids", []))
        cv2.putText(plot_im, f"Tracks: {total_tracks}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        return SolutionResults(im0=im0, plot_im=plot_im)


# ========== üé• MAIN LOOP ==========
if __name__ == "__main__":
    cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")
    # cap = cv2.VideoCapture(0)
    assert cap.isOpened(), "Error: video not found or cannot be opened."

    w, h, fps = (int(cap.get(x)) for x in
                 (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    writer = cv2.VideoWriter("security_output.avi",
                             cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))
       
    face_alarm = FaceRecognitionAlarm(
        show=True,
        model="yolo11m.pt",
        records=1,
        # classes=[0],  # person
        known_face_encodings=known_face_encodings,
        known_face_names=known_face_names,
        conf=0.3,
        iou=0.5,
        # persist=True,
        # tracker="./ultralytics/cfg/trackers/bytetrack.yaml",
    )

    while cap.isOpened():
        ret, im0 = cap.read()
        if not ret:
            print("[INFO] Video finished or empty frame.")
            break

        results = face_alarm(im0)
        # writer.write(results.plot_im)
        cv2.imshow("Face Recognition Security Alarm", results.plot_im)
        # pygame.event.pump()

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    writer.release()
    cv2.destroyAllWindows()
    pygame.quit()
    print("[INFO] Surveillance session ended.")


In [None]:
# ...existing code...
# """
# Face-recognition aware Security Alarm using Ultralytics solutions (yolo11m-seg.pt or yolo11m.pt).
# Includes update_labels_info() and example main loop.
# Run: python d:\Projects\ultralytics\security_face_alarm.py
# """
import os
import cv2
import time
import numpy as np
import face_recognition
import pygame
from typing import Any, Dict, List, Tuple

from ultralytics import solutions
from ultralytics.solutions.solutions import SolutionAnnotator, SolutionResults
from ultralytics.utils.plotting import colors
from ultralytics.utils import LOGGER
# from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
from ultralytics.engine.results import Results
# from ultralytics.utils.plotting import colors

# -----------------------
# Configuration
# -----------------------
MODEL = "yolo11l.pt"         # set to yolo11m-seg.pt for segmentation support or yolo11m.pt
VIDEO_SOURCE = "./media_files/animal_surveillance/goru-churi.mp4"
# VIDEO_SOURCE = 0              # 0 for webcam, or path to video file
KNOWN_FACES_DIR = "family_members"   # directory structured as family_members/<name>/*.jpg
ALARM_SOUND = "pols-aagyi-pols.mp3"
FACE_TOLERANCE = 0.55               # lower = stricter
RECOGNITION_SCALE = 0.25            # scale for face detection (speed)
RECOGNITION_INTERVAL = 5            # run face recognition every N frames
RECORDS_THRESHOLD = 3               # number of unknown persons to trigger alarm

# -----------------------
# Utility: update_labels_info
# -----------------------
def update_labels_info(label: Dict[str, Any]) -> Dict[str, Any]:
    """
    Normalize/augment a detection/segmentation label dict for stable display.
    Expected keys (optional): box, cls, conf, track_id/id, name/cls_name, custom_prefix.
    Adds/ensures: 'track_id' and 'display_name'.
    """
    # track id fallback
    track_id = label.get("track_id") or label.get("id") or label.get("track") or None
    if track_id is None and label.get("box") is not None:
        try:
            x1, y1, x2, y2 = map(int, label["box"])
            track_id = f"{(x1 + x2)//2}_{(y1 + y2)//2}"
        except Exception:
            track_id = "0"

    # readable class/name
    name = label.get("name") or label.get("cls_name") or ""
    if not name and label.get("cls") is not None:
        cls_val = label.get("cls")
        # label may include 'names' externally, user can set label['name'] before calling
        name = label.get("label") or f"class_{int(cls_val)}"

    conf = label.get("conf")
    prefix = str(label.get("custom_prefix", "person"))

    base = name if name else f"class_{label.get('cls','?')}"
    if conf is not None:
        try:
            display = f"{prefix}_{track_id} {base} ({float(conf):.2f})"
        except Exception:
            display = f"{prefix}_{track_id} {base}"
    else:
        display = f"{prefix}_{track_id} {base}"

    label["track_id"] = track_id
    label["display_name"] = display
    return label

# -----------------------
# Face recognition loader
# -----------------------
def load_known_faces(known_dir: str) -> Tuple[List[np.ndarray], List[str]]:
    encs: List[np.ndarray] = []
    names: List[str] = []
    if not os.path.exists(known_dir):
        LOGGER.warning("Known faces dir '%s' not found.", known_dir)
        return encs, names

    for person in os.listdir(known_dir):
        person_dir = os.path.join(known_dir, person)
        if not os.path.isdir(person_dir):
            continue
        for fname in os.listdir(person_dir):
            if fname.lower().endswith((".jpg", ".jpeg", ".png")):
                path = os.path.join(person_dir, fname)
                try:
                    img = face_recognition.load_image_file(path)
                    fe = face_recognition.face_encodings(img)
                    if fe:
                        encs.append(fe[0])
                        names.append(person)
                        LOGGER.info("Loaded face '%s' from %s", person, fname)
                except Exception as e:
                    LOGGER.warning("Failed load face %s: %s", path, e)
    LOGGER.info("Loaded %d face encodings for %d people", len(encs), len(set(names)))
    return encs, names

# -----------------------
# FaceRecognitionAlarm class
# -----------------------
class FaceRecognitionAlarm(solutions.SecurityAlarm):
    """
    SecurityAlarm subclass that performs face recognition and updates annotation labels
    using update_labels_info(). Triggers alarm sound when unknown person count >= records.
    """

    def __init__(self, face_data_path: str = KNOWN_FACES_DIR, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.known_face_encodings, self.known_face_names = load_known_faces(face_data_path)
        self.face_tolerance = FACE_TOLERANCE
        self.frame_idx = 0
        self.recognition_interval = RECOGNITION_INTERVAL
        self.sound_played = False
        self.show_conf = self.CFG.get("show_conf", True)
        self.show_labels = self.CFG.get("show_labels", True)
        self.show_boxes = self.CFG.get("show_boxes", True)
        # init pygame mixer for alarm sound (optional)
        try:
            pygame.mixer.init()
            if os.path.exists(ALARM_SOUND):
                pygame.mixer.music.load(ALARM_SOUND)
        except Exception as e:
            LOGGER.warning("pygame init/load sound failed: %s", e)

    def play_sound(self):
        if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
            try:
                pygame.mixer.music.play()
                self.sound_played = True
                LOGGER.info("Playing alarm sound")
            except Exception as e:
                LOGGER.warning("Could not play sound: %s", e)

    def reset_sound(self):
        if pygame.mixer.get_init() and pygame.mixer.music.get_busy():
            try:
                pygame.mixer.music.stop()
            except Exception:
                pass
        self.sound_played = False

    def __call__(self, im0):
        """
        Process a frame: extract_tracks (base), run intermittent face detection on a small frame,
        associate faces with person boxes, build label_dict for each detection and draw via annotator.
        Returns SolutionResults(plot_im=...) similar to base.
        """
        self.frame_idx += 1
        # ensure model has run once: extract_tracks populates self.boxes,self.clss,self.confs,self.track_ids
        self.extract_tracks(im0)
        # self.extract_tracks(im0)
        # annotator = SolutionAnnotator(im0, line_width=self.line_width)
         # plot_im = annotator.result()
        self.masks = getattr(self.tracks[0], "masks", None)

        # Annotation for segmentation masks
        # Iterate over detected classes, track IDs, and segmentation masks
        if self.masks is None:
            self.LOGGER.warning("No masks detected! Ensure you're using a supported Ultralytics segmentation model.")
            plot_im = im0
        else:
            results = Results(im0, path=None, names=self.names, boxes=self.track_data.data, masks=self.masks.data)
            plot_im = results.plot(
                line_width=self.line_width,
                boxes=self.show_boxes,
                conf=self.show_conf,
                labels=self.show_labels,
                color_mode="instance",
            )

        annotator = SolutionAnnotator(plot_im, line_width=self.line_width)

        # run face detection/encodings on interval
        face_locations = []
        face_encodings = []
        if self.frame_idx % self.recognition_interval == 0:
            small = cv2.resize(im0, (0, 0), fx=RECOGNITION_SCALE, fy=RECOGNITION_SCALE)
            rgb_small = cv2.cvtColor(small, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_small)
            face_encodings = face_recognition.face_encodings(rgb_small, face_locations)

        unknown_count = 0
        person_cls_id = 0  # COCO 'person'

        # iterate detections
        for i, (box, conf, cls) in enumerate(zip(self.boxes, self.confs, self.clss)):
            try:
                x1, y1, x2, y2 = map(int, box)
            except Exception:
                # box may be a tensor or other structure
                b = np.array(box).astype(int)
                x1, y1, x2, y2 = int(b[0]), int(b[1]), int(b[2]), int(b[3])

            label_name = self.names[int(cls)] if hasattr(self, "names") else f"class_{int(cls)}"
            color = colors(int(cls), True)
            is_known = False
            name = label_name

            if int(cls) == person_cls_id and face_encodings:
                # find any face whose center lies within the box (scale face locations back)
                for (top, right, bottom, left), f_enc in zip(face_locations, face_encodings):
                    top *= int(1/RECOGNITION_SCALE)
                    right *= int(1/RECOGNITION_SCALE)
                    bottom *= int(1/RECOGNITION_SCALE)
                    left *= int(1/RECOGNITION_SCALE)
                    center_x = (left + right) // 2
                    center_y = (top + bottom) // 2
                    if x1 <= center_x <= x2 and y1 <= center_y <= y2:
                        # compare
                        if self.known_face_encodings:
                            dists = face_recognition.face_distance(self.known_face_encodings, f_enc)
                            best = np.argmin(dists)
                            if dists[best] < self.face_tolerance:
                                name = self.known_face_names[best]
                                is_known = True
                                # print(f"‚úÖ Known face detected: {name} (dist={dists[best]:.2f})")
                                print(f"‚úÖ Known Person Detected: {name}")
                                color = (0, 255, 0)
                                
                            else:
                                name = "Unknown"
                                is_known = False
                                color = (0, 0, 255)
                                # print(f" ‚ö†Ô∏è Unknown face detected: {name} (dist={dists[best]:.2f})")
                                print(f"‚ö†Ô∏è Unknown Person Detected!")
                        else:
                            name = "Unknown"
                            is_known = False
                            color = (0, 0, 255)
                        break
                else:
                    # no face matched inside box
                    name = f"{label_name} (No Face)"
                    color = colors(int(cls), True)

            # build label dict and draw using update_labels_info
            label_dict = {
                "box": [x1, y1, x2, y2],
                "cls": int(cls),
                "conf": float(conf) if conf is not None else None,
                "track_id": None,
                "name": name,
                "colors":color,
                "custom_prefix": "ID:" if int(cls) == person_cls_id else label_name,
            }

            # resolve track id if available
            if hasattr(self, "track_ids") and isinstance(self.track_ids, (list, tuple)):
                try:
                    label_dict["track_id"] = self.track_ids[i]
                except Exception:
                    pass

            label_dict = update_labels_info(label_dict)
            annotator.box_label(label_dict["box"], label=label_dict["display_name"], color=colors(cls, True))
            # annotator.box_label(label_dict["box"], label=self.names[cls], color=colors(cls, True))

            if int(cls) == person_cls_id and not is_known:
                unknown_count += 1

        # alarm control
        if unknown_count >= self.records:
            if not self.sound_played:
                self.play_sound()
        else:
            if self.sound_played:
                self.reset_sound()

        plot_im = annotator.result()
        # draw track count
        total_tracks = len(getattr(self, "track_ids", []))
        cv2.putText(plot_im, f"Tracks: {total_tracks}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2)

        return SolutionResults(im0=im0, plot_im=plot_im)

# -----------------------
# Main
# -----------------------
def main():
    cap = cv2.VideoCapture(VIDEO_SOURCE)
    assert cap.isOpened(), f"Cannot open video source: {VIDEO_SOURCE}"

    # create instance
    alarm = FaceRecognitionAlarm(
        face_data_path=KNOWN_FACES_DIR,
        show=True,
        model=MODEL,
        records=RECORDS_THRESHOLD,
        # classes=[0],   # person only
        conf=0.3,
        # iou=0.5,
    )

    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS) or 20.0
    out = cv2.VideoWriter("security_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

    print("Starting surveillance. Press q to quit.")
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            results = alarm(frame)
            plot = results.plot_im if hasattr(results, "plot_im") else frame
            out.write(plot)
            cv2.imshow("Security", plot)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                break
    finally:
        cap.release()
        out.release()
        cv2.destroyAllWindows()
        try:
            pygame.mixer.quit()
        except Exception:
            pass

if __name__ == "__main__":
    main()
# ...existing code...

In [None]:
# ========================== üõ°Ô∏è AI SURVEILLANCE GUARD APP ==========================
# Real-time Security System using YOLO + Face Recognition + Email Alert + Alarm
# Author: Rahat Ansari | 2025 | Open Source for Educational Use
# ===============================================================================

# ---------- Imports ----------
import cv2
import os
import numpy as np
import smtplib
import time
import pygame
from datetime import datetime
from ultralytics import YOLO
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from deepface import DeepFace

# ---------- Configuration ----------
FAMILY_DIR = "family_members"
YOLO_MODEL_PATH = "yolo11m.pt"  # or yolov8s.pt for better accuracy
ALARM_SOUND_PATH = "pols-aagyi-pols.mp3" # Place a simple alarm sound file
EMAIL_SENDER = "your_email@gmail.com"
EMAIL_PASSWORD = "your_app_password"
EMAIL_RECEIVER = "receiver_email@gmail.com"

# Initialize YOLO
model = YOLO(YOLO_MODEL_PATH)

# Initialize Alarm
pygame.mixer.init()
def play_alarm():
    pygame.mixer.music.load(ALARM_SOUND_PATH)
    pygame.mixer.music.play()

# ---------- Helper Functions ----------
def send_email_alert(image_path, subject="üö® Security Alert! Unauthorized Person Detected"):
    """Send email with image attachment when intruder detected."""
    try:
        msg = MIMEMultipart()
        msg["From"] = EMAIL_SENDER
        msg["To"] = EMAIL_RECEIVER
        msg["Subject"] = subject

        body = "An unauthorized person was detected.\nSee attached snapshot for details."
        msg.attach(MIMEText(body, "plain"))

        with open(image_path, "rb") as attachment:
            part = MIMEBase("application", "octet-stream")
            part.set_payload(attachment.read())
        encoders.encode_base64(part)
        part.add_header("Content-Disposition", f"attachment; filename={os.path.basename(image_path)}")
        msg.attach(part)

        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(EMAIL_SENDER, EMAIL_PASSWORD)
            server.sendmail(EMAIL_SENDER, EMAIL_RECEIVER, msg.as_string())
        print("üìß Email alert sent successfully!")
    except Exception as e:
        print("‚ùå Email sending failed:", e)

def is_authorized_face(frame):
    """Check if face matches any authorized family member."""
    for member in os.listdir(FAMILY_DIR):
        member_path = os.path.join(FAMILY_DIR, member)
        if not os.path.isdir(member_path):
            continue
        for img_name in os.listdir(member_path):
            ref_path = os.path.join(member_path, img_name)
            try:
                result = DeepFace.verify(frame, ref_path, model_name="Facenet", enforce_detection=False)
                if result["verified"]:
                    return member  # Authorized person found
            except Exception:
                continue
    return None

def log_event(event_type, name="Unknown"):
    """Log detection events."""
    with open("event_log.txt", "a") as log:
        log.write(f"[{datetime.now()}] {event_type}: {name}\n")

# ---------- Main Surveillance ----------
def start_surveillance():
    print("üöÄ Starting AI Surveillance System...")
    cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")
    # cap = cv2.VideoCapture(0)
    unauthorized_detected = False

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Run YOLO Detection
        results = model(frame)
        for r in results:
            for box in r.boxes:
                cls = int(box.cls[0])
                label = model.names[cls]
                if label == "person":
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    person_crop = frame[y1:y2, x1:x2]

                    name = is_authorized_face(person_crop)
                    if name:
                        cv2.putText(frame, f"Authorized: {name}", (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
                        unauthorized_detected = False
                    else:
                        cv2.putText(frame, "Unauthorized!", (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)

                        if not unauthorized_detected:
                            unauthorized_detected = True
                            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                            image_path = f"intruder_{timestamp}.jpg"
                            # cv2.imwrite(image_path, frame)
                            play_alarm()
                            # send_email_alert(image_path)
                            log_event("Unauthorized Person Detected")

        cv2.imshow("AI Security Guard", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

# ---------- Run the App ----------
if __name__ == "__main__":
    start_surveillance()


# Vision Eye is an AI-powered surveillance solution that leverages advanced computer vision techniques to monitor and analyze video feeds in real-time. It is designed to enhance security and safety in various environments, such as homes, offices, and public spaces.

In [None]:
import cv2

from ultralytics import solutions

cap = cv2.VideoCapture("media_files/animal_surveillance/goru-churi.mp4")
# cap = cv2.VideoCapture("media_files/WIN_20251103_14_11_20_Pro.mp4")
assert cap.isOpened(), "Error reading video file"

# Video writer
w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
video_writer = cv2.VideoWriter("visioneye_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# Initialize vision eye object
visioneye = solutions.VisionEye(
    show=True,  # display the output
    model="yolo11m.pt",  # use any model that Ultralytics support, i.e, YOLOv10
    # classes=[0, 19],  # generate visioneye view for specific classes
    vision_point=(50, 50), # the point, where vision will view objects and draw tracks
    records=3,
    conf=0.5,
)

# Process video
while cap.isOpened():
    success, im0 = cap.read()

    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = visioneye(im0)

    print(results)  # access the output

    # video_writer.write(results.plot_im)  # write the video file

cap.release()
video_writer.release()
cv2.destroyAllWindows()  # destroy all opened windows

trying with vision eye

In [None]:
import cv2
from numpy import source

from ultralytics import solutions
from ultralytics.utils.plotting import Annotator

import os
import cv2
import numpy as np
import face_recognition
import pygame
from ultralytics import solutions
from ultralytics import YOLO
from ultralytics.solutions.config import SolutionConfig
from ultralytics.utils import LOGGER

from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
from ultralytics.utils.plotting import colors

# ========== üîä SOUND SETUP ==========
pygame.mixer.init()
ALARM_FILE = "security_alarm2.mp3"
if os.path.exists(ALARM_FILE):
    pygame.mixer.music.load(ALARM_FILE)
else:
    print(f"[WARNING] Alarm file '{ALARM_FILE}' not found.")


# ========== üß† KNOWN FACE ENCODING LOADER ==========
KNOWN_FACE_DIR = "family_members"
known_face_encodings, known_face_names = [], []

if os.path.exists(KNOWN_FACE_DIR):
    for name in os.listdir(KNOWN_FACE_DIR):
        person_dir = os.path.join(KNOWN_FACE_DIR, name)
        if not os.path.isdir(person_dir):
            continue
        for filename in os.listdir(person_dir):
            path = os.path.join(person_dir, filename)
            try:
                img = face_recognition.load_image_file(path)
                enc = face_recognition.face_encodings(img)
                if enc:
                    known_face_encodings.append(enc[0])
                    known_face_names.append(name)
                    print(f"[INFO] Loaded face for {name} from {filename}")
            except Exception as e:
                print(f"[ERROR] Failed loading {path}: {e}")
else:
    print("[WARNING] No known_faces directory found.")


# ========== üëÅÔ∏è FACE-RECOGNITION ALARM (REVISED & OPTIMIZED) ==========
class FaceRecognitionAlarmVisionEye(solutions.VisionEye):
    def __init__(self, *args, known_face_encodings=None, known_face_names=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.known_face_encodings = known_face_encodings or []
        self.known_face_names = known_face_names or []
        self.sound_played = False
        # Best practice: Set face recognition tolerance during initialization
        self.face_tolerance = 0.55
        self.vision_point = self.CFG["vision_point"]
        self.records = self.CFG.get("records", 1)
        # self.show = self.CFG.get("show", True)
    
    def play_sound(self):
        """Plays the alarm sound if it's not already playing."""
        if not self.sound_played:
            if pygame.mixer.get_init() and not pygame.mixer.music.get_busy():
                pygame.mixer.music.play()
                self.sound_played = True
                LOGGER.info("üö® Alarm Triggered: Unknown person count reached threshold.")

    def reset_sound(self):
        """Stops the alarm sound and resets the state."""
        if self.sound_played:
            if pygame.mixer.get_init():
                pygame.mixer.music.stop()
            self.sound_played = False
            LOGGER.info("üü¢ Alarm Reset: Area clear.")

    def __call__(self, im0):
        """
        Processes a single frame for person detection and face recognition.
        This implementation follows best practices for accuracy and performance.
        """
        # 1. Get person detections from the base class
        self.extract_tracks(im0)
        annotator = SolutionAnnotator(im0, line_width=self.line_width)
        
        unknown_person_count = 0

        # 2. Optimize by finding all faces in the frame at once (on a smaller version)
        # This is much faster than processing crops for each person.
        h, w, _ = im0.shape
        small_frame = cv2.resize(im0, (0, 0), fx=0.25, fy=0.25)
        rgb_small_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)

        # 3. Iterate through detected PERSONS from YOLO
        for box, conf, cls, t_id in zip(self.boxes, self.confs, self.clss, self.track_ids):
            if int(cls) == 0:  # Skip if not a person
                

                name = "Unknown"
                is_known = False
                
                # 4. Associate faces with person boxes
                # Check if any detected face is inside this person's bounding box
                person_box_left, person_box_top, person_box_right, person_box_bottom = map(int, box)
                
                for (face_top, face_right, face_bottom, face_left), face_encoding in zip(face_locations, face_encodings):
                    # Scale face locations back to original image size
                    face_top *= 4
                    face_right *= 4
                    face_bottom *= 4
                    face_left *= 4

                    # Check if the center of the face is inside the person's box
                    face_center_x = (face_left + face_right) // 2
                    face_center_y = (face_top + face_bottom) // 2

                    if (person_box_left <= face_center_x <= person_box_right and
                        person_box_top <= face_center_y <= person_box_bottom):
                        
                        # 5. Use robust face matching for the associated face
                        if self.known_face_encodings:
                            face_distances = face_recognition.face_distance(self.known_face_encodings, face_encoding)
                            best_match_index = np.argmin(face_distances)
                            
                            if face_distances[best_match_index] < self.face_tolerance:
                                name = self.known_face_names[best_match_index]
                                is_known = True
                        
                        # Once a face is matched to this person, stop checking other faces
                        break 
                
                # 6. Update counter and draw labels
                if not is_known:
                    unknown_person_count += 1
                    color = (0, 0, 255) # Red for Unknown
                    # label = f"Unknown ({conf:.2f})"
                    label = f"Unknown"
                else:
                    color = (0, 255, 0) # Green for Known
                    label = f"{name}"
                    # label = f"{name} ({conf:.2f})"
                
                # annotator.box_label(box, label, color=color)
                
            
                # annotator.box_label(box, label=self.adjust_box_label(cls, conf, t_id), color=colors(int(t_id), True))
                # annotator.visioneye(box, self.vision_point)
                # ...existing code...
        
                # Annotate the image with bounding boxes, labels, and vision mapping
                # annotator.box_label(box, label=self.adjust_box_label(cls, conf, t_id), color=colors(int(t_id), True))
                # annotator.visioneye(box, self.vision_point)
                # build base label from the existing adjust_box_label()
                base_label = self.adjust_box_label(int(cls), float(conf) if conf is not None else 0.0, t_id)

                # custom label for 'person' class (COCO id 0). Use CFG override if provided.
                if int(cls) == 0:
                    prefix = str(self.CFG.get("person_label_prefix", label))
                    custom_label = f"{prefix}:"
                    # if base_label exists, concat both for full display
                    final_label = f"{custom_label} {base_label}" if base_label else custom_label
                else:
                    final_label = base_label

                # draw final label and vision eye mapping
                annotator.box_label(box, label=final_label, color=colors(int(t_id), True))
            else:
                # For non-person classes, use default labeling
                annotator.box_label(box, label=self.adjust_box_label(cls, conf, t_id), color=colors(int(t_id), True))
            
            annotator.visioneye(box, self.vision_point) 

        # 7. Trigger alarm based on the COUNT of unknown people and the 'records' threshold
        if unknown_person_count >= self.records:
            self.play_sound()
        else:
            self.reset_sound()

        plot_im = annotator.result()
        self.display_output(plot_im) 
        
        
        # Display track count on the frame
        total_tracks = len(getattr(self, "track_ids", []))
        cv2.putText(plot_im, f"Tracks: {total_tracks}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids))

    # # ...existing code...
    # def __call__(self, im0):
    #     # Safe defaults and config
    #     scale = float(self.CFG.get("recognition_scale", 0.25))
    #     rec_interval = int(self.CFG.get("recognition_interval", 3))
    #     person_cls_id = int(self.CFG.get("person_class", 0))
    #     person_prefix = str(self.CFG.get("person_label_prefix", "person_"))

    #     # Prepare annotator and tracks
    #     self.extract_tracks(im0)
    #     annotator = SolutionAnnotator(im0, line_width=self.line_width)

    #     # Intermittent face detection for performance
    #     if not hasattr(self, "_frame_idx"):
    #         self._frame_idx = 0
    #     self._frame_idx += 1
    #     face_locations, face_encodings = [], []
    #     if self._frame_idx % rec_interval == 0:
    #         small = cv2.resize(im0, (0, 0), fx=scale, fy=scale)
    #         rgb_small = cv2.cvtColor(small, cv2.COLOR_BGR2RGB)
    #         face_locations = face_recognition.face_locations(rgb_small)
    #         face_encodings = face_recognition.face_encodings(rgb_small, face_locations)

    #     unknown_count = 0
    #     rescale = int(1.0 / scale) if scale > 0 else 1

    #     # Iterate detections
    #     for i, (box, conf, cls) in enumerate(zip(self.boxes, self.confs, self.clss)):
    #         try:
    #             x1, y1, x2, y2 = map(int, box)
    #         except Exception:
    #             b = np.array(box).astype(int)
    #             x1, y1, x2, y2 = int(b[0]), int(b[1]), int(b[2]), int(b[3])

    #         # default label/color
    #         name = self.names[int(cls)] if hasattr(self, "names") else f"class_{int(cls)}"
    #         color = colors(int(cls), True)

    #         # non-person: annotate and continue
    #         if int(cls) != person_cls_id:
    #             annotator.box_label([x1, y1, x2, y2], label=self.adjust_box_label(cls, conf, (self.track_ids[i] if i < len(self.track_ids) else None)), color=color)
    #             # optional vision mapping for all classes
    #             if getattr(self, "vision_point", None):
    #                 annotator.visioneye([x1, y1, x2, y2], tuple(self.CFG.get("vision_point", self.vision_point)))
    #             continue

    #         # associate face inside person box (if we have face encodings)
    #         matched_name = None
    #         if face_encodings:
    #             for (ftop, fright, fbottom, fleft), fenc in zip(face_locations, face_encodings):
    #                 # scale face coords back to original
    #                 ftop, fright, fbottom, fleft = int(ftop * rescale), int(fright * rescale), int(fbottom * rescale), int(fleft * rescale)
    #                 cx, cy = (fleft + fright) // 2, (ftop + fbottom) // 2
    #                 if x1 <= cx <= x2 and y1 <= cy <= y2:
    #                     if self.known_face_encodings:
    #                         dists = face_recognition.face_distance(self.known_face_encodings, fenc)
    #                         best = int(np.argmin(dists))
    #                         if dists[best] < getattr(self, "face_tolerance", 0.55):
    #                             matched_name = self.known_face_names[best]
    #                     break

    #         if matched_name:
    #             label = f"{matched_name}"
    #             color = (0, 255, 0)
    #         else:
    #             unknown_count += 1
    #             track_id = (self.track_ids[i] if i < len(self.track_ids) else None)
    #             prefix = f"{person_prefix}{track_id} " if track_id is not None else f"{person_prefix}"
    #             base_label = self.adjust_box_label(cls, conf, track_id) or ""
    #             label = f"{prefix}{base_label}".strip()
    #             color = (0, 0, 255)

    #         # safe color: if track id present use it else use cls
    #         tid = self.track_ids[i] if (hasattr(self, "track_ids") and i < len(self.track_ids)) else None
    #         try:
    #             draw_color = colors(int(tid), True) if tid is not None else color
    #         except Exception:
    #             draw_color = color

    #         annotator.box_label([x1, y1, x2, y2], label=label, color=draw_color)
    #         if getattr(self, "vision_point", None):
    #             annotator.visioneye([x1, y1, x2, y2], tuple(self.CFG.get("vision_point", self.vision_point)))

    #     # alarm handling (existing methods)
    #     if unknown_count >= getattr(self, "records", 1):
    #         self.play_sound()
    #     else:
    #         self.reset_sound()

    #     plot_im = annotator.result()
    #     self.display_output(plot_im)
    #     return SolutionResults(plot_im=plot_im, total_tracks=len(getattr(self, "track_ids", [])))
    # # ...existing code...
if __name__ == "__main__":

    # cap = cv2.VideoCapture("media_files/person/bappi/WIN_20251122_20_23_39_Pro.mp4")
    cap = cv2.VideoCapture("media_files/animal_surveillance/goru-churi.mp4")
    # cap = cv2.VideoCapture(0)
    assert cap.isOpened(), "Error reading video file"

    # Video writer
    w, h, fps = (int(cap.get(x)) for x in (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    video_writer = cv2.VideoWriter("visioneye_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))



    # face_alarm = FaceRecognitionAlarm(
    #         show=True,
    #         model="yolo11l.pt",
    #         records=1,
    #         # classes=[0, 19],  # person
    #         known_face_encodings=known_face_encodings, 
    #         known_face_names=known_face_names,
    #         conf=0.5,
    #     )
    # Initialize vision eye object
    visioneyeInterface = FaceRecognitionAlarmVisionEye(
        show=True,  # display the output
        model="yolo11m.pt",  # use any model that Ultralytics support, i.e, YOLOv10
        # classes=[0, 19],  # generate visioneye view for specific classes
        vision_point=(50, 50),  # the point, where vision will view objects and draw tracks
        known_face_encodings=known_face_encodings, 
        known_face_names=known_face_names,
        records=2,
        conf=0.5,
        # show_labels=True,
    )

    # # Process video
    # while cap.isOpened():
    #     success, im0 = cap.read()

    #     if not success:
    #         print("Video frame is empty or video processing has been successfully completed.")
    #         break

    #         results = visioneyeInterface(im0)

    #         # if not success:
    #         #     print("Video frame is empty or video processing has been successfully completed.")
    #         #     break

    #         # results = visioneye(im0)
    #         # results = visioneyeInterface(frame)

    #         # print(results)  # access the output

    #         # video_writer.write(results.plot_im)  # write the video file
    #         # cv2.imshow("Face Recognition Security Alarm", results.plot_im)
    #         print(results)  # access the output

    #         # video_writer.write(results.plot_im)  # write the video file

    # cap.release()
    # video_writer.release()
    # cv2.destroyAllWindows()  # destroy all opened windows
# # Initialize vision eye object properly by instantiating the custom class
# visioneye = FaceRecognitionAlarmVisionEye(
#     show=True,  # display the output
#     model="yolo11m.pt",  # use any model that Ultralytics support, i.e, YOLOv10
#     # classes=[0, 19],  # generate visioneye view for specific classes
#     vision_point=(50, 50),  # the point, where vision will view objects and draw tracks
#     known_face_encodings=known_face_encodings,
#     known_face_names=known_face_names,
#     records=3,
#     conf=0.5,
# )

# Open a video source (try webcam first, fall back to sample file)
# cap = cv2.VideoCapture(0)
# if not cap.isOpened():
# cap = cv2.VideoCapture("media_files/animal_surveillance/goru-churi.mp4")
# if not cap.isOpened():
#     raise RuntimeError("Failed to open webcam or fallback video file. Please provide a valid video source.")

# # Create a video writer to save output
# w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
# h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
# video_writer = cv2.VideoWriter("visioneye_output.avi", cv2.VideoWriter_fourcc(*"mp4v"), fps, (w, h))

# # Process video frames
# while True:
#     success, im0 = cap.read()
#     if not success:
#         print("[INFO] Video finished or empty frame.")
#         break

#     results = visioneye(im0)

#     # Print/inspect the results object
#     print(results)

#     # # Write result frame if available
#     # if getattr(results, "plot_im", None) is not None:
#     #     video_writer.write(results.plot_im)

#     # # Optionally show the frame (respects visioneye.show)
#     # if getattr(visioneye, "show", False):
#     #     cv2.imshow("Face Recognition Security Alarm", results.plot_im)
#     #     if cv2.waitKey(1) & 0xFF == ord("q"):
#     #         break

# cap.release()
# video_writer.release()
# cv2.destroyAllWindows()
# Process video
while cap.isOpened():
    success, im0 = cap.read()

    if not success:
        print("Video frame is empty or video processing has been successfully completed.")
        break

    results = visioneyeInterface(im0)

    print(results)  # access the output

    # video_writer.write(results.plot_im)  # write the video file
    
    if cv2.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
video_writer.release()
cv2.destroyAllWindows() 

pygame 2.6.1 (SDL 2.28.4, Python 3.12.8)
Hello from the pygame community. https://www.pygame.org/contribute.html
[INFO] Loaded face for robin from image1.jpg
Ultralytics Solutions:  {'source': None, 'model': 'yolo11m.pt', 'classes': None, 'show_conf': True, 'show_labels': True, 'region': None, 'colormap': 21, 'show_in': True, 'show_out': True, 'up_angle': 145.0, 'down_angle': 90, 'kpts': [6, 8, 10], 'analytics_type': 'line', 'figsize': (12.8, 7.2), 'blur_ratio': 0.5, 'vision_point': (50, 50), 'crop_dir': 'cropped-detections', 'json_file': None, 'line_width': 2, 'records': 2, 'fps': 30.0, 'max_hist': 5, 'meter_per_pixel': 0.05, 'max_speed': 120, 'show': True, 'iou': 0.7, 'conf': 0.5, 'device': None, 'max_det': 300, 'half': False, 'tracker': 'botsort.yaml', 'verbose': True, 'data': 'images'}

0: 384x640 5 cows, 65.9ms
Speed: 2.3ms preprocess, 65.9ms inference, 126.8ms postprocess per image at shape (1, 3, 384, 640)
SolutionResults(total_tracks=5)

0: 384x640 5 cows, 26.6ms
Speed: 2.2ms p

In [None]:
from ultralytics.data.annotator import auto_annotate

auto_annotate(
    data="dog.jpeg",
    det_model="yolo11n.pt",
    sam_model="mobile_sam.pt",
    device="cuda",
    output_dir="output_dir",
)

# chatgpt solution

In [None]:
import cv2
import numpy as np
import face_recognition
import mediapipe as mp
from ultralytics import YOLO
import smtplib
import pygame
import os
import time
from datetime import datetime

# Initialize MediaPipe Face Detection
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

def detect_faces_mediapipe(frame):
    """
    Detect faces using MediaPipe
    Returns list of face bounding boxes in format [x, y, w, h]
    """
    face_boxes = []
    
    with mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5) as face_detection:
        # Convert BGR to RGB for MediaPipe
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = face_detection.process(rgb_frame)
        
        if results.detections:
            h, w, _ = frame.shape
            for detection in results.detections:
                bbox = detection.location_data.relative_bounding_box
                x = int(bbox.xmin * w)
                y = int(bbox.ymin * h)
                width = int(bbox.width * w)
                height = int(bbox.height * h)
                face_boxes.append([x, y, width, height])
    
    return face_boxes

# Initialize YOLO model for object detection
# model = YOLO("runs/detect/train2/weights/best.pt")
model = YOLO("yolo11m.pt")

# Initialize MediaPipe for pose detection (not directly used for the requested features but kept for completeness)
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Load known faces
known_face_encodings = []
known_face_names = []

known_faces_dir = "family_members" 
if os.path.exists(known_faces_dir):
    for person_name in os.listdir(known_faces_dir):
        person_dir = os.path.join(known_faces_dir, person_name)
        if os.path.isdir(person_dir):
            for image_name in os.listdir(person_dir):
                image_path = os.path.join(person_dir, image_name)
                try:
                    image = face_recognition.load_image_file(image_path)
                    face_encodings = face_recognition.face_encodings(image)
                    if face_encodings:
                        known_face_encodings.append(face_encodings[0])
                        known_face_names.append(person_name)
                        print(f"Loaded face: {person_name} from {image_name}")
                except Exception as e:
                    print(f"Error loading {image_path}: {e}")

if known_face_encodings:
    print(f"Successfully loaded {len(known_face_encodings)} face encodings for {len(set(known_face_names))} people")
else:
    print("Warning: No face encodings loaded. Face recognition will not work.")

# Setup alarm sound
pygame.mixer.init()
alarm_file = "pols-aagyi-pols.mp3"
if os.path.exists(alarm_file):
    pygame.mixer.music.load(alarm_file)
else:
    print(f"Warning: Alarm file {alarm_file} not found")

# Create log directory
log_dir = "security_logs"
os.makedirs(log_dir, exist_ok=True)

def log_event(event_type, details=""):
    """Log security events to file"""
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_file = os.path.join(log_dir, f"security_log_{datetime.now().strftime('%Y-%m-%d')}.txt")
    with open(log_file, "a") as f:
        f.write(f"{timestamp} - {event_type}: {details}\n")

# This function is not fully implemented for actual email sending, but logs the intent.
def send_email_alert(person_status, person_name="N/A", objects_detected=None):
    """Function to simulate sending email alert when a person is detected."""
    if objects_detected is None:
        objects_detected = []
    
    objects_str = ", ".join(objects_detected) if objects_detected else "None"

    if person_status == "KNOWN":
        subject = f"Security Alert: Known Person Detected - {person_name}"
        body = f"A known person, {person_name}, was detected at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}.\n" \
               f"Objects detected: {objects_str}"
        print(f"Simulating email alert for KNOWN person: {person_name} with objects: {objects_str}")
        log_event("EMAIL_ALERT_KNOWN", f"To: security_team@example.com, Subject: {subject}")
    elif person_status == "UNKNOWN":
        subject = f"URGENT Security Alert: Unknown Person Detected!"
        body = f"An UNKNOWN person was detected at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}.\n" \
               f"Objects detected: {objects_str}"
        print(f"Simulating email alert for UNKNOWN person with objects: {objects_str}")
        log_event("EMAIL_ALERT_UNKNOWN", f"To: security_team@example.com, Subject: {subject}")
    
    # In a real application, you would add smtplib code here to send the email.
    # For example:
    # try:
    #     server = smtplib.SMTP('smtp.your_email_provider.com', 587)
    #     server.starttls()
    #     server.login('your_email@example.com', 'your_password')
    #     msg = f"Subject: {subject}\n\n{body}"
    #     server.sendmail('your_email@example.com', 'security_team@example.com', msg)
    #     server.quit()
    #     log_event("EMAIL_SENT", f"Subject: {subject}")
    # except Exception as e:
    #     log_event("EMAIL_ERROR", f"Failed to send email: {e}")


# Start Video Capture
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture("./media_files/WIN_20251103_14_11_20_Pro.mp4")
if not cap.isOpened():
    print("Error: Could not open video capture device")
    exit()

# Performance optimization variables
frame_count = 0
face_recognition_interval = 5  # Process face recognition every 5 frames
last_alert_time = 0
alert_cooldown = 10  # Seconds between alerts for the same type of event

# Define objects of interest (subset of COCO classes that YOLO can detect)
objects_of_interest = [
    "person", "bicycle", "car", "motorcycle", "bus", "truck", "mouse",
    "backpack", "umbrella", "handbag", "tie", "suitcase",
    "cell phone", "laptop", "book", "scissors", "knife", "face"
]

print("Security monitoring started. Press 'q' to quit.")
log_event("SYSTEM_START")

try:
    while True:
        timer = cv2.getTickCount()
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break
            
        frame_count += 1
        process_faces = frame_count % face_recognition_interval == 0
        current_time = time.time()
        
        results = model(frame, conf=0.5, verbose=False)        
        
        detected_objects = []
        
        # Iterate through YOLO results
        for result in results:
            boxes = result.boxes
            
            for i, box in enumerate(boxes):
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                x1, y1 = max(0, x1), max(0, y1)
                x2, y2 = min(frame.shape[1], x2), min(frame.shape[0], y2)
                
                if x2 <= x1 or y2 <= y1:
                    continue
                
                cls = int(box.cls[0])
                conf = float(box.conf[0])
                class_name = result.names[cls]
                
                if class_name in objects_of_interest and class_name != "person":
                    detected_objects.append(class_name)
                    
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 255), 2) # Cyan for other objects
                    label = f"{class_name}: {conf:.2f}"
                    cv2.putText(frame, label, (x1, y1 - 10), 
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
                
                if class_name == "person":
                    person_roi = frame[y1:y2, x1:x2]
                    
                    person_status = "UNKNOWN" # Default to unknown
                    person_name = "UNKNOWN"
                    
                    if person_roi.size > 0 and person_roi.shape[0] > 0 and person_roi.shape[1] > 0:
                        face_boxes = detect_faces_mediapipe(person_roi)
                        
                        for face_box in face_boxes:
                            fx, fy, fw, fh = face_box
                            face_x1 = x1 + fx
                            face_y1 = y1 + fy
                            face_x2 = face_x1 + fw
                            face_y2 = face_y1 + fh
                            
                            cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2), (0, 0, 255), 2) # Red for face itself
                            cv2.putText(frame, "Face", (face_x1 + 5, face_y1 - 5), 
                                      cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
                                
                        if process_faces and face_boxes: # Only process face recognition if faces are detected by MediaPipe
                            rgb_small_frame = cv2.cvtColor(cv2.resize(person_roi, (0, 0), fx=0.25, fy=0.25), cv2.COLOR_BGR2RGB)
                            face_locations_small = face_recognition.face_locations(rgb_small_frame)
                            
                            if face_locations_small:
                                face_encodings_small = face_recognition.face_encodings(rgb_small_frame, face_locations_small)
                                
                                for face_encoding in face_encodings_small:
                                    if known_face_encodings:
                                        matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
                                        
                                        if any(matches):
                                            face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
                                            best_match_index = np.argmin(face_distances)
                                            if matches[best_match_index]:
                                                person_name = known_face_names[best_match_index]
                                                person_status = "KNOWN"
                                                break # Found a known person, no need to check other faces in this ROI
                                    
                    # Draw person box based on status
                    if person_status == "KNOWN":
                        box_color = (0, 255, 0)  # Green for known
                        label = f"KNOWN: {person_name}"
                        cv2.rectangle(frame, (x1, y1), (x2, y2), box_color, 2)
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, box_color, 2)
                        print(f"‚úÖ Known Person Detected: {person_name}")
                        log_event("KNOWN_PERSON", f"Detected: {person_name} with objects: {', '.join(detected_objects) if detected_objects else 'None'}")
                        if current_time - last_alert_time > alert_cooldown:
                            # send_email_alert("KNOWN", person_name, detected_objects)
                            last_alert_time = current_time
                    else:
                        box_color = (0, 165, 255) # Orange for unknown
                        label = "UNKNOWN"
                        cv2.rectangle(frame, (x1, y1), (x2, y2), box_color, 2)
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, box_color, 2)
                        print("‚ö†Ô∏è Unknown Person Detected!")
                        log_event("UNKNOWN_PERSON", f"With objects: {', '.join(detected_objects) if detected_objects else 'None'}")
                        if current_time - last_alert_time > alert_cooldown:
                            if os.path.exists(alarm_file) and not pygame.mixer.music.get_busy():
                                pygame.mixer.music.play()
                            # send_email_alert("UNKNOWN", objects_detected=detected_objects)
                            last_alert_time = current_time
        
        # Display detected objects summary
        if detected_objects:
            objects_text = f"Objects: {', '.join(set(detected_objects))}"
            cv2.putText(frame, objects_text, (20, 60), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
        
        # Calculate and display FPS
        fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
        cv2.putText(frame, f"FPS: {int(fps)}", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        
        cv2.imshow('Security Monitoring', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
except Exception as e:
    print(f"An error occurred: {e}")
    log_event("SYSTEM_ERROR", str(e))
finally:
    cap.release()
    cv2.destroyAllWindows()
    pose.close()
    pygame.mixer.quit()
    log_event("SYSTEM_SHUTDOWN")
    print("Security monitoring stopped.")