In [2]:
!pip install -U transformers
!pip install opencv-python



# Task
Perform real-time human/non-human detection from an HPW100 camera feed using the `Human-vs-NonHuman-Detection` model and continuously print the detection results.

In [30]:
# load libraries
from huggingface_hub import hf_hub_download
from ultralytics import YOLO
from supervision import Detections
from PIL import Image

# download model
model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")

# load model
model = YOLO(model_path)

# inference
image_path = "/content/WhatsApp Image 2024-12-04 at 16.34.57_8aaccd3f.jpg"
output = model(Image.open(image_path))
results = Detections.from_ultralytics(output[0])



0: 640x608 1 FACE, 13.3ms
Speed: 5.6ms preprocess, 13.3ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 608)


In [28]:
!pip install supervision

Collecting supervision
  Downloading supervision-0.27.0-py3-none-any.whl.metadata (13 kB)
Downloading supervision-0.27.0-py3-none-any.whl (212 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.4/212.4 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: supervision
Successfully installed supervision-0.27.0


In [35]:
import time
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
import io
from PIL import Image
import numpy as np
import cv2

# 2. Define a JavaScript function for webcam control within a Python function
def video_stream_js():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var img;
    var dataURL;

    async function startCamera() {
      div = document.createElement('div');
      document.body.appendChild(div);

      video = document.createElement('video');
      video.style.display = 'block';
      div.appendChild(video);

      try {
        stream = await navigator.mediaDevices.getUserMedia({video: true});
        video.srcObject = stream;
        await video.play();

        google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

        captureCanvas = document.createElement('canvas');
        captureCanvas.width = video.videoWidth;
        captureCanvas.height = video.videoHeight;
        img = document.createElement('img');
        img.src = '';
        div.appendChild(img);
        return true; // Indicate success
      } catch (error) {
        console.error('Failed to start camera:', error);
        if (div) {
          document.body.removeChild(div);
          div = null;
        }
        throw error; // Re-throw to ensure Python receives the error type
      }
    }

    async function captureFrame() {
      if (!div) {
        console.error('Camera not started. Call startCamera() first.');
        return '';
      }
      captureCanvas.getContext('2d').drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
      dataURL = captureCanvas.toDataURL('image/jpeg');
      img.src = dataURL;

      return dataURL;
    }

    function stopCamera() {
      if (stream) {
        stream.getTracks().forEach(track => track.stop());
      }
      if (div) {
        document.body.removeChild(div);
        div = null;
      }
    }

    google.colab.output.expose(startCamera);
    google.colab.output.expose(captureFrame);
    google.colab.output.expose(stopCamera);
  ''')
  display(js)

# 3. Create a Python function `video_stream()`
def video_stream():
  print("\n*** IMPORTANT: Please grant camera access in your browser pop-up to proceed. ***\n")
  # Use eval_js with an IIAFE to ensure the async startCamera completes and waits for it.
  eval_js('(async () => { await startCamera(); })();')

# 4. Create another Python function `take_photo()`
def take_photo(quality=0.8):
  # Call the JavaScript captureFrame function
  data = eval_js('captureFrame();')
  # Decode the base64 string
  binary = b64decode(data.split(',')[1])
  # Convert to PIL Image
  image_pil = Image.open(io.BytesIO(binary))
  # Convert to OpenCV format (numpy array)
  image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
  return image_cv

# 5. Create a Python function to stop the video stream
def stop_video_stream():
  display(Javascript('stopCamera();'))

# 6. Call the `video_stream_js()` function to expose the JavaScript camera control functions
print("Initializing webcam functions...")
video_stream_js()

# 7. Call the `video_stream()` function to initialize and start the webcam feed
video_stream()
time.sleep(2) # Added a small delay to ensure JS functions are exposed and camera started
print("Webcam initialized. Waiting for user input...")

Initializing webcam functions...


<IPython.core.display.Javascript object>


*** IMPORTANT: Please grant camera access in your browser pop-up to proceed. ***

Webcam initialized. Waiting for user input...


In [55]:
# ====================================
# CELL 1: Import Libraries
# ====================================
import time
import cv2
from PIL import Image
from ultralytics import YOLO
from supervision import Detections
from IPython.display import Javascript, display
from google.colab.output import eval_js
from base64 import b64decode
import io
import numpy as np

# ====================================
# CELL 2: Load YOLO Model
# ====================================
from huggingface_hub import hf_hub_download

print("Loading model...")
model_path = hf_hub_download(repo_id="arnabdhar/YOLOv8-Face-Detection", filename="model.pt")
model = YOLO(model_path)
print("✓ Model loaded!")

# ====================================
# CELL 3: Setup Webcam Functions
# ====================================
def video_stream_js():
    js = Javascript('''
        var video;
        var div = null;
        var stream;
        var captureCanvas;

        async function startCamera() {
            div = document.createElement('div');
            document.body.appendChild(div);

            video = document.createElement('video');
            video.style.display = 'block';
            div.appendChild(video);

            try {
                stream = await navigator.mediaDevices.getUserMedia({video: true});
                video.srcObject = stream;
                await video.play();

                google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

                captureCanvas = document.createElement('canvas');
                captureCanvas.width = video.videoWidth;
                captureCanvas.height = video.videoHeight;
                return true;
            } catch (error) {
                console.error('Failed to start camera:', error);
                if (div) {
                    document.body.removeChild(div);
                    div = null;
                }
                throw error;
            }
        }

        async function captureFrame() {
            if (!div || !video) {
                console.error('Camera not started. Call startCamera() first.');
                return '';
            }
            var context = captureCanvas.getContext('2d');
            context.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
            return captureCanvas.toDataURL('image/jpeg');
        }

        function stopCamera() {
            if (stream) {
                stream.getTracks().forEach(track => track.stop());
            }
            if (div) {
                document.body.removeChild(div);
                div = null;
            }
        }

        window.startCamera = startCamera;
        window.captureFrame = captureFrame;
        window.stopCamera = stopCamera;
    ''')
    display(js)

def video_stream():
    print("Grant camera access in browser popup")
    return eval_js('startCamera()')

def take_photo():
    data = eval_js('captureFrame()')
    if data is None or data == '':
        raise Exception("Failed to capture frame. Camera may not be ready.")
    binary = b64decode(data.split(',')[1])
    image_pil = Image.open(io.BytesIO(binary))
    image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
    return image_cv

def stop_video_stream():
    display(Javascript('stopCamera();'))

print("✓ Functions ready!")

# ====================================
# CELL 4: Initialize Camera (RUN THIS ONCE)
# ====================================
print("Starting camera...")
video_stream_js()
time.sleep(2)  # Wait for JS to load
video_stream()
time.sleep(4)  # Increased wait time for camera
print("✓ Camera ready!")

# ====================================
# CELL 5: Run Face Detection
# ====================================
print("Starting detection... Press STOP to end")
print("=" * 50)

frame_count = 0

try:
    while True:
        frame_count += 1

        print(f"\n[Frame #{frame_count}] Time: {time.strftime('%H:%M:%S')}")
        print("-" * 50)

        try:
            # Capture and process
            frame_cv = take_photo()
            frame_rgb = cv2.cvtColor(frame_cv, cv2.COLOR_BGR2RGB)
            image_pil = Image.fromarray(frame_rgb)

            # Detect faces
            results = model(image_pil)
            detections = Detections.from_ultralytics(results[0])
            face_detections = detections[detections.class_id == 0]

            # Show results
            if len(face_detections) > 0:
                print(f"✅ {len(face_detections)} Face(s) Detected")
                for i in range(len(face_detections)):
                    conf = face_detections.confidence[i]
                    print(f"   Face {i+1}: Confidence {conf:.2%}")
            else:
                print("❌ No face detected")

        except Exception as e:
            print(f"⚠️ Error: {e}")
            print("Retrying in 2 seconds...")
            time.sleep(2)
            continue

        # Wait 1 second before next capture
        time.sleep(3)

except KeyboardInterrupt:
    print("\n" + "=" * 50)
    print("Detection stopped!")
finally:
    stop_video_stream()
    print(f"Camera closed. Total frames: {frame_count}")

Loading model...
✓ Model loaded!
✓ Functions ready!
Starting camera...


<IPython.core.display.Javascript object>

Grant camera access in browser popup
✓ Camera ready!
Starting detection... Press STOP to end

[Frame #1] Time: 07:24:58
--------------------------------------------------

0: 480x640 (no detections), 9.6ms
Speed: 1.6ms preprocess, 9.6ms inference, 1.2ms postprocess per image at shape (1, 3, 480, 640)
❌ No face detected

[Frame #2] Time: 07:25:02
--------------------------------------------------

0: 480x640 1 FACE, 7.0ms
Speed: 1.3ms preprocess, 7.0ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)
✅ 1 Face(s) Detected
   Face 1: Confidence 50.13%

[Frame #3] Time: 07:25:05
--------------------------------------------------

0: 480x640 (no detections), 6.9ms
Speed: 1.3ms preprocess, 6.9ms inference, 0.6ms postprocess per image at shape (1, 3, 480, 640)
❌ No face detected

[Frame #4] Time: 07:25:08
--------------------------------------------------

0: 480x640 (no detections), 7.7ms
Speed: 1.3ms preprocess, 7.7ms inference, 0.6ms postprocess per image at shape (1, 3, 48

<IPython.core.display.Javascript object>

Camera closed. Total frames: 24
