In [3]:
pip install opencv-python

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip show kafka-python

Note: you may need to restart the kernel to use updated packages.




In [None]:
pip install numpy==1.26.4
#   This command specifically asks `pip` to find a pre-built wheel for `numpy` version 1.26.4. If a compatible wheel exists on the official package index, it will download and install it, completely avoiding the need for a compiler.

In [4]:
pip install facenet-pytorch

Collecting numpy<2.0.0,>=1.24.0 (from facenet-pytorch)
  Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl (15.8 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
Successfully installed numpy-1.26.4
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.


In [5]:
pip install numpy torch torchvision

Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install kafka-python

Note: you may need to restart the kernel to use updated packages.


In [7]:
pip install confluent_kafka

Note: you may need to restart the kernel to use updated packages.


In [8]:
pip install yt-dlp

Note: you may need to restart the kernel to use updated packages.


In [9]:
# Cell 1: Imports and Setup
import torch
import cv2
import os
import time
from PIL import Image
from facenet_pytorch import MTCNN, InceptionResnetV1

# --- Imports (ensure these are at the top of your cell) ---
from collections import deque
import time
import csv
from datetime import datetime

import json # (NEW) For creating JSON strings

# --- (NEW) KAFKA INTEGRATION: Import the Kafka Producer ---
# You'll need to run: pip install kafka-python
from kafka import KafkaProducer



print("All libraries imported successfully.")

All libraries imported successfully.


In [10]:
# Cell 2: Model and Device Configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f'Running on device: {device}')

# Initialize MTCNN for face detection
mtcnn = MTCNN(image_size=160, margin=14, keep_all=True, device=device)

# Initialize InceptionResnetV1 for face embedding extraction
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

print("Models initialized successfully.")

Running on device: cpu
Models initialized successfully.


In [11]:
# Cell 3: Function to Build the Initial Database
def generate_known_embeddings(database_path):
    """
    Processes a directory of images to create a database of known face embeddings.
    This function will run only if a pre-saved embeddings file is not found.
    """
    known_embeddings = {}
    for person_name in os.listdir(database_path):
        person_dir = os.path.join(database_path, person_name)
        if not os.path.isdir(person_dir):
            continue
        embeddings_list = []
        for image_name in os.listdir(person_dir):
            image_path = os.path.join(person_dir, image_name)
            if not os.path.isfile(image_path):
                continue
            try:
                img = Image.open(image_path).convert('RGB')
                face_tensors = mtcnn(img)
                if face_tensors is not None:
                    for face_tensor in face_tensors:
                        embedding = resnet(face_tensor.unsqueeze(0).to(device))
                        embeddings_list.append(embedding.detach().cpu())
            except Exception as e:
                print(f"Could not process image {image_path}: {e}")
        if embeddings_list:
            known_embeddings[person_name] = torch.cat(embeddings_list).mean(0, keepdim=True)
            print(f"Generated initial embeddings for {person_name}")
    return known_embeddings

print("Database generation function is defined.")

Database generation function is defined.


In [12]:
# Cell 4: Load or Create the Face Database
print("Initializing face database...")

# Define paths
database_folder = 'face_database'
embeddings_file = 'known_faces_embeddings.pt'

# Create database folder if it doesn't exist
os.makedirs(database_folder, exist_ok=True)

# Logic to load existing database or create a new one
if os.path.exists(embeddings_file):
    print("Loading known faces from saved file.")
    known_faces_embeddings = torch.load(embeddings_file)
else:
    print("No saved database found. Generating new embeddings from folder...")
    known_faces_embeddings = generate_known_embeddings(database_folder)
    # Save the newly generated embeddings for future runs
    torch.save(known_faces_embeddings, embeddings_file)

print("\n---------------------------------")
print("Known faces database is ready.")

if known_faces_embeddings:
    print("The following people are in the database:")
    print(list(known_faces_embeddings.keys()))
else:
    print("The database is empty. You can start with an empty 'face_database' folder.")

Initializing face database...
Loading known faces from saved file.

---------------------------------
Known faces database is ready.
The following people are in the database:
['access group employee', 'theif']


In [None]:
# =========================================================================================
# --- CELL 5 (LIVE STREAM FINAL V2): With PERSISTENT, VISIBLE LABELS ---
# =========================================================================================

# --- Imports ---
import time
from datetime import datetime, timezone
import cv2
from PIL import Image
import json
import socket
import os
from confluent_kafka import Producer
import yt_dlp

# --- General Configuration ---
DISTANCE_THRESHOLD = 0.75
FRAME_SKIP = 5 # Process 1 in every 5 frames, but display boxes on all frames

# --- PASTE THE LIVE STREAM URL HERE ---
YOUTUBE_LIVE_URL = "https://www.youtube.com/watch?v=qKB-hXcztfg" # <--- REPLACE THIS URL

# --- All captures will happen inside the main database folder ---
DATABASE_FOLDER = 'face_database'
UNASSIGNED_FOLDER = os.path.join(DATABASE_FOLDER, '_unassigned_faces')
os.makedirs(UNASSIGNED_FOLDER, exist_ok=True)
print(f"Unassigned faces will be saved to: '{UNASSIGNED_FOLDER}'")

# --- System & Message Configuration ---
KAFKA_TOPIC = 'presence_events'
CAMERA_ID = "LiveYouTubeCam"

# --- CONFLUENT CLOUD CREDENTIALS ---
BOOTSTRAP_SERVERS = "pkc-oxqxx9.us-east-1.aws.confluent.cloud:9092" # Replace with your actual server
API_KEY = "MHIVSVMGMGGDQTUT"
API_SECRET = "cfltEV69I/NI2uHNBb5aooJFyKwYTJH9EuWP8hv+dtJ+GLd6sbuYW/dzRyYmbiQg"

# --- Initialize Confluent Kafka Producer ---
producer = None
try:
    print("Attempting to connect Confluent Kafka producer...")
    conf = { 'bootstrap.servers': BOOTSTRAP_SERVERS, 'security.protocol': 'SASL_SSL', 'sasl.mechanisms': 'PLAIN', 'sasl.username': API_KEY, 'sasl.password': API_SECRET, 'client.id': socket.gethostname() }
    producer = Producer(conf)
    print("SUCCESS: Confluent Kafka producer created.")
    def delivery_report(err, msg):
        if err is not None: print(f"--- KAFKA ERROR: Message delivery failed: {err} ---")
except Exception as e:
    print(f"--- KAFKA WARNING: Could not create producer. Reason: {e} ---")

# --- Get Live Stream URL using yt-dlp ---
cap = None
try:
    print(f"Attempting to open live stream: {YOUTUBE_LIVE_URL}")
    ydl_opts = {'format': 'best[height<=720]', 'quiet': True}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(YOUTUBE_LIVE_URL, download=False)
        stream_url = info['url']   
    cap = cv2.VideoCapture(stream_url)
    if not cap.isOpened(): print("Error: Could not open the extracted live stream URL with OpenCV.")
except Exception as e:
    print(f"Error: Failed to get live stream URL. Details: {e}")

# --- Main Processing Loop ---
if cap and cap.isOpened():
    print(f"\nSUCCESS: Live stream opened. Starting face recognition...")
    frame_count = 0
    # (--- NEW ---) Create a list to store face data between processing frames
    last_known_faces = []
    print("\n--- Real-time JSON Event Log ---")

    while True:
        ret, frame = cap.read()
        if not ret:
            print("\nStream ended or could not read frame. Exiting.")
            break

        frame_count += 1
        
        # --- Processing Logic (only runs on frames divisible by FRAME_SKIP) ---
        if frame_count % FRAME_SKIP == 0:
            try:
                # (--- NEW ---) Clear the list for this new set of detections
                last_known_faces.clear()
                
                pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                boxes, _ = mtcnn.detect(pil_image)
                face_tensors = mtcnn(pil_image)

                if face_tensors is not None:
                    for face_tensor, box in zip(face_tensors, boxes):
                        unknown_embedding = resnet(face_tensor.to(device).unsqueeze(0)).detach().cpu()
                        min_dist = float('inf')
                        recognized_name = "Unknown"  
                        
                        for name, known_emb in known_faces_embeddings.items():
                            distance = (known_emb - unknown_embedding).norm().item()
                            if distance < min_dist:
                                min_dist = distance
                                if min_dist < DISTANCE_THRESHOLD:
                                    recognized_name = name
                        
                        # (--- NEW ---) Store the results instead of drawing immediately
                        last_known_faces.append((box, recognized_name))
                        
                        # The auto-capture and Kafka logic still runs here
                        if recognized_name == "Unknown":
                            timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
                            filename = f"unknown_{timestamp_str}.jpg"
                            save_path = os.path.join(UNASSIGNED_FOLDER, filename)
                            face_image_to_save = pil_image.crop(box)
                            face_image_to_save.save(save_path)
                            print(f"  -> Saved unassigned face to: {save_path}")

                        event_message = { "event_type": "FACE_RECOGNIZED", "timestamp": datetime.now(timezone.utc).isoformat(), "presence": 1 if recognized_name != "Unknown" else 0, "personName": recognized_name, "cameraId": CAMERA_ID }
                        encoded_message = json.dumps(event_message).encode('utf-8')
                        print(json.dumps(event_message))

                        if producer is not None:
                            producer.poll(0)
                            producer.produce(KAFKA_TOPIC, value=encoded_message, callback=delivery_report)
            except Exception as e:
                print(f"An error occurred during frame processing: {e}")

        # #######################################################################
        # ### (--- NEW ---) Persistent Drawing Logic (runs on EVERY frame) ###
        # #######################################################################
        # Redraw the last known faces on the current frame
        for box, name in last_known_faces:
            x1, y1, x2, y2 = [int(b) for b in box]
            color = (0, 255, 0) if name != "Unknown" else (0, 0, 255)

            # Draw the main bounding box
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            # Draw the background for the text
            cv2.rectangle(frame, (x1, y2 - 25), (x2, y2), color, cv2.FILLED)
            # Draw the name
            cv2.putText(frame, name, (x1 + 6, y2 - 6), cv2.FONT_HERSHEY_DUPLEX, 0.7, (255, 255, 255), 1)

        # Show the final frame (with or without new drawings)
        cv2.imshow('Live Stream Face Recognition', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    # --- Cleanup ---
    cap.release()
    cv2.destroyAllWindows()
    if producer is not None:
        print("\nFlushing remaining messages to Kafka...")
        producer.flush()
        print("Kafka producer connection closed.")
    for i in range(5): cv2.waitKey(1)
    print("Live stream processing stopped.")

Unassigned faces will be saved to: 'face_database\_unassigned_faces'
Attempting to connect Confluent Kafka producer...
SUCCESS: Confluent Kafka producer created.
Attempting to open live stream: https://www.youtube.com/watch?v=qKB-hXcztfg

SUCCESS: Live stream opened. Starting face recognition...

--- Real-time JSON Event Log ---
{"event_type": "FACE_RECOGNIZED", "timestamp": "2025-10-10T06:59:55.031931+00:00", "presence": 1, "personName": "access group employee", "cameraId": "LiveYouTubeCam"}
{"event_type": "FACE_RECOGNIZED", "timestamp": "2025-10-10T06:59:56.312555+00:00", "presence": 1, "personName": "access group employee", "cameraId": "LiveYouTubeCam"}
{"event_type": "FACE_RECOGNIZED", "timestamp": "2025-10-10T06:59:57.407116+00:00", "presence": 1, "personName": "access group employee", "cameraId": "LiveYouTubeCam"}
{"event_type": "FACE_RECOGNIZED", "timestamp": "2025-10-10T06:59:58.889551+00:00", "presence": 1, "personName": "access group employee", "cameraId": "LiveYouTubeCam"}
{

In [40]:
# Cell 6: Final Save of the Database
print("\nSaving updated face database to file...")
torch.save(known_faces_embeddings, embeddings_file)
print(f"Database saved successfully to '{embeddings_file}'.")
print("The following people are now in the database:")
print(list(known_faces_embeddings.keys()))


Saving updated face database to file...
Database saved successfully to 'known_faces_embeddings.pt'.
The following people are now in the database:
[]
