In [1]:
import cv2
import dlib
import numpy as np
import os
import threading
from queue import Queue
from datetime import datetime
from pymongo import MongoClient
import time
from fer import FER
from gtts import gTTS
import playsound

# MongoDB setup
client = MongoClient("mongodb+srv://admin-aesha:zaP6CjyjK8WpnG7@cluster0.ka7ha.mongodb.net/?retryWrites=true&w=majority")


db = client["CCTV"]
collection = db["activity_log"]

# Paths to models
known_faces_dir = "https://res.cloudinary.com/ddoeialig/image/upload/v1739511233/uploads/"
shape_predictor_path = "C:/Users/aesha/Downloads/CCTV VIDEO DETECTION/shape_predictor_68_face_landmarks.dat"
face_rec_model_path = "C:/Users/aesha/Downloads/CCTV VIDEO DETECTION/dlib_face_recognition_resnet_model_v1.dat"

# Load models
print("🔄 Loading models...")
detector = dlib.get_frontal_face_detector()  
shape_predictor = dlib.shape_predictor(shape_predictor_path)
face_rec_model = dlib.face_recognition_model_v1(face_rec_model_path)

# Load emotion detector
emotion_detector = FER()

# Load known faces
print("🔄 Loading known faces...")
known_face_encodings, known_face_names = [], []
for filename in os.listdir(known_faces_dir):
    filepath = os.path.join(known_faces_dir, filename)
    image = cv2.imread(filepath)
    if image is None:
        continue
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    faces = detector(rgb_image, 1)  
    if len(faces) == 0:
        continue
    shape = shape_predictor(rgb_image, faces[0])
    face_encoding = np.array(face_rec_model.compute_face_descriptor(rgb_image, shape))
    known_face_encodings.append(face_encoding)
    known_face_names.append(os.path.splitext(filename)[0])

known_face_encodings = np.array(known_face_encodings)

# ** RTSP Camera Setup **
rtsp_url = "rtsp://admin:RoadRacer%401.@117.99.109.118:554/cam/realmonitor?channel=1&subtype=0"
cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)

cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)  
cap.set(cv2.CAP_PROP_FPS, 50)  
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)  
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)  

if not cap.isOpened():
    print("❌ Error: Could not connect to RTSP stream.")
    exit()

print("✅ RTSP stream connected successfully.")

# ** Multi-threading Setup **
frame_queue = Queue(maxsize=5)  
detection_count = {}  # Tracks number of times each person is detected
frame_skip = 2  
frame_count = 0

# ** Function to Play Welcome Message **
def play_welcome_message(name, activity):
    message = f"{name} is {activity}"
    file_path = "C:/Users/Aesha/Downloads/welcome.mp3"
    
    try:
        tts = gTTS(text=message, lang='en')
        tts.save(file_path)
        playsound.playsound(file_path)
    except Exception as e:
        print(f"Error during speech generation or playback: {e}")
    finally:
        if os.path.exists(file_path):
            os.remove(file_path)  

# ** Function to Save to Database **
def save_to_database(name, emotion):
    global detection_count
    
    if name not in detection_count:
        detection_count[name] = 1
    else:
        detection_count[name] += 1

    # Alternate activity based on detection count
    activity = "entering" if detection_count[name] % 2 == 1 else "exiting"

    data = {"name": name, "activity": activity, "emotion": emotion, "timestamp": datetime.now()}
    collection.insert_one(data)
    print(f"✅ Stored: {name} | Activity: '{activity}' | Emotion: '{emotion}' | Time: {data['timestamp']}")

    play_welcome_message(name, activity)

# ** Function to Process Frames (Separate Thread) **
def process_frame():
    while True:
        if not frame_queue.empty():
            frame = frame_queue.get()
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            faces = detector(rgb_frame, 0)  

            for face in faces:
                shape = shape_predictor(rgb_frame, face)
                face_encoding = np.array(face_rec_model.compute_face_descriptor(rgb_frame, shape))

                if known_face_encodings.size > 0:
                    distances = np.linalg.norm(known_face_encodings - face_encoding, axis=1)
                    min_distance_index = np.argmin(distances)
                    if distances[min_distance_index] < 0.45:
                        name = known_face_names[min_distance_index]
                    else:
                        name = "Unknown"
                else:
                    name = "Unknown"

                x, y, w, h = face.left(), face.top(), face.width(), face.height()
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(frame, name, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

                # ** Emotion Detection **
                face_roi = frame[y:y+h, x:x+w]
                if face_roi.size > 0:
                    emotion, score = emotion_detector.top_emotion(face_roi)
                    if emotion is None:
                        emotion = "Neutral"
                    print(f"🧠 Emotion detected: {emotion} (Score: {score})")
                else:
                    emotion = "Neutral"

                # ** Store Data & Play Welcome Message **
                if name != "Unknown":
                    save_to_database(name, emotion)

            cv2.imshow("📷 Real-Time Face Recognition", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# ** Start Processing Thread **
threading.Thread(target=process_frame, daemon=True).start()

# ** Video Streaming Loop (Optimized) **
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("⚠ Warning: Failed to read frame from RTSP. Retrying...")
        continue

    frame_count += 1
    if frame_count % frame_skip != 0:
        continue  

    if not frame_queue.full():
        frame_queue.put(frame)  

cap.release()
cv2.destroyAllWindows()

🔄 Loading models...
🔄 Loading known faces...


OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'https://res.cloudinary.com/ddoeialig/image/upload/v1739511233/uploads/'