#**Google Colab: Access Webcam for Video**


In [None]:
# import dependencies
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import numpy as np
import PIL
import io
import html
import time

In [None]:
pip install opencv-python



## Helper Functions
Below are a few helper function to make converting between different image data types and formats.

In [None]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

## Haar Cascade Classifier
For this tutorial we will run a simple object detection algorithm called Haar Cascade on our images and video fetched from our webcam. OpenCV has a pre-trained Haar Cascade face detection model.

In [None]:
# initialize the Haar Cascade face detection model
face_cascade = cv2.CascadeClassifier(cv2.samples.findFile(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'))

In [None]:
def bbox_to_bytes(bbox_array):
    bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
    iobuf = io.BytesIO()
    bbox_PIL.save(iobuf, format='png')
    bbox_bytes = 'data:image/png;base64,'+ str(b64encode(iobuf.getvalue()), 'utf-8')
    return bbox_bytes

# Load pre-trained face and eye detection models
face_cascade = cv2.CascadeClassifier(cv2.samples.findFile(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'))
eye_cascade = cv2.CascadeClassifier(cv2.samples.findFile(cv2.data.haarcascades + 'haarcascade_eye.xml'))


#**Video capturing**

In [None]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

#**Detection in Real time**

In [None]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break
    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])
    # create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)
    # grayscale image for face detection
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # get face region coordinates
    faces = face_cascade.detectMultiScale(gray)
    # get face bounding box for overlay
    for (x,y,w,h) in faces:
      bbox_array = cv2.rectangle(bbox_array,(x,y),(x+w,y+h),(255,0,0),2)
    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes

import cv2
import time
from base64 import b64encode
# Initialize variables
blink_count = 0
eye_closed_start = None  # Timer for when eyes are closed
blink_detected = False  # Track whether a blink was detected
frame_width = 640  # Frame width
frame_height = 480  # Frame height
middle_threshold = (frame_width // 3, 2 * frame_width // 3)  # Define left, middle, and right zones
alert_triggered = False
alert_display_start = None  # Start time for displaying the alert button
# Main loop
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break
    img = js_to_image(js_reply["img"])
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Detect faces
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    position = "Unknown"
    face_detected = len(faces) > 0
    if face_detected:
        for (x, y, w, h) in faces:
            roi_gray = gray[y:y+h, x:x+w]
            roi_color = img[y:y+h, x:x+w]
            # Detect eyes within the face region
            eyes = eye_cascade.detectMultiScale(roi_gray)
# Check if eyes are closed (less than 2 eyes detected)
        if len(eyes) < 2:
            blink_status = True
        else:
            if blink_status:
                blink_count += 1  # Count a blink when eyes reopen
                blink_status = False

            # Blink detection logic
            if len(eyes) < 2:  # Eyes are closed
                if not eye_closed_start:
                    eye_closed_start = time.time()  # Start eye-closed timer
                if time.time() - eye_closed_start > 0.07 and not blink_detected:
                    blink_detected = True  # Blink is detected
            else:  # Eyes are open
                if blink_detected:
                    blink_detected = False  # Reset blink detection
                eye_closed_start = None  # Reset eye-closed timer

            if face_center_x < frame_width // 3:
                      position = "Left"
            elif face_center_x > 2 * frame_width // 3:
                       position = "Right"
            else:
                       position = "Middle"
            # Draw rectangles around the face and eyes
            cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
            for (ex, ey, ew, eh) in eyes:
                cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
    # Alert condition for no face detected
    if not face_detected:
        alert_triggered = True
    else:
        alert_triggered = False
# Alert if eyes are closed for 30 seconds or more
    if eye_closed_start and current_time - eye_closed_start >= 4:
        alert_triggered = True
    # Alert if in left/right position for 1 or 2 minutes
    if left_or_right_start and current_time - left_or_right_start >= 10:  # Set to 120 for 2 minutes
        alert_triggered = True
    # Alert if no face is detected for 1 or 2 minutes
    if not face_detected and current_time - start_time >= 10:  # Set to 120 for 2 minutes
        alert_triggered = True

    # Handle alert display timing
    if alert_triggered:
        if not alert_display_start:
            alert_display_start = time.time()  # Start alert display timer
    else:
        alert_display_start = None  # Reset timer when no alert is triggered
    # Display the alert button if within the 1-minute display window
    if alert_display_start and time.time() - alert_display_start <= 30:
        # Draw the alert button
        button_x1, button_y1 = frame_width // 3, frame_height // 3
        button_x2, button_y2 = 2 * frame_width // 3, 2 * frame_height // 3
        cv2.rectangle(img, (button_x1, button_y1), (button_x2, button_y2), (0, 0, 255), -1)  # Red-filled rectangle
        cv2.putText(img, "ALERT", (button_x1 + 50, button_y1 + 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 4)
    # Overlay box based on position
    if position == "Left" or position == "Right":
        cv2.rectangle(img, (0, 0), (frame_width, 480), (0, 165, 255), 5)  # Orange box
    elif position == "Middle":
        cv2.rectangle(img, (0, 0), (frame_width, 480), (255, 0, 0), 5)  # Blue box
    # Overlay blink count and position text in green
    overlay_text = f"Blinks: {blink_count} | Position: {position}"
    cv2.putText(img, overlay_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    # Convert the image to a base64 string for display
    _, img_encoded = cv2.imencode('.jpg', img)
    bbox_bytes = 'data:image/jpeg;base64,' + str(b64encode(img_encoded), 'utf-8')
    bbox = bbox_bytes


<IPython.core.display.Javascript object>

In [None]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break
    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])
    # create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)
    # grayscale image for face detection
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # get face region coordinates
    faces = face_cascade.detectMultiScale(gray)
    # get face bounding box for overlay
    for (x,y,w,h) in faces:
      bbox_array = cv2.rectangle(bbox_array,(x,y),(x+w,y+h),(255,0,0),2)
    bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes
import cv2
import time
from base64 import b64encode
import numpy as np

# Initialize variables
blink_count = 0
eye_closed_start = None
blink_status = False
frame_width = 640
frame_height = 480
alert_triggered = False
alert_display_start = None
position = "Unknown"
left_or_right_start = None  # Timer for left/right position

# Define thresholds
eye_close_threshold = 15  # Seconds to trigger alert for closed eyes
left_right_threshold = 10  # Seconds to trigger alert for left/right position
face_absence_threshold = 10  # Seconds to trigger alert for no face
alert_display_duration = 30  # Seconds to display alert

# Timer for no face detection
face_detected = False
start_time = time.time()

while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break
    img = js_to_image(js_reply["img"])
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Detect faces
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    current_time = time.time()

    if len(faces) > 0:
        for (x, y, w, h) in faces:
            face_detected = True
            start_time = current_time  # Reset the no-face timer
            roi_gray = gray[y:y + h, x:x + w]
            roi_color = img[y:y + h, x:x + w]

            # Face position detection
            face_center_x = x + w // 2
            if face_center_x < frame_width // 3:
                position = "Left"
                if not left_or_right_start:
                    left_or_right_start = current_time
            elif face_center_x > 2 * frame_width // 3:
                position = "Right"
                if not left_or_right_start:
                    left_or_right_start = current_time
            else:
                position = "Middle"
                left_or_right_start = None  # Reset left/right timer

            # Detect eyes
            eyes = eye_cascade.detectMultiScale(roi_gray)

            if len(eyes) < 2:  # Eyes closed
                if not eye_closed_start:
                    eye_closed_start = current_time  # Start eye-closed timer
                blink_status = True
            else:  # Eyes open
                if blink_status:
                    blink_count += 1
                    blink_status = False
                eye_closed_start = None  # Reset eye-closed timer

            # Draw rectangles around the face and eyes
            cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
            for (ex, ey, ew, eh) in eyes:
                cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2)
    else:
        face_detected = False
        position = "Unknown"

    # Alert logic
    if eye_closed_start and current_time - eye_closed_start >= eye_close_threshold:
        alert_triggered = True
    elif left_or_right_start and current_time - left_or_right_start >= left_right_threshold:
        alert_triggered = True
    elif not face_detected and current_time - start_time >= face_absence_threshold:
        alert_triggered = True
    else:
        alert_triggered = False

    # Handle alert display timing
    if alert_triggered:
        if not alert_display_start:
            alert_display_start = current_time
    else:
        alert_display_start = None

    # Display alert if within alert display window
    if alert_display_start and current_time - alert_display_start <= alert_display_duration:
        button_x1, button_y1 = frame_width // 3, frame_height // 3
        button_x2, button_y2 = 2 * frame_width // 3, 2 * frame_height // 3
        cv2.rectangle(img, (button_x1, button_y1), (button_x2, button_y2), (0, 0, 255), -1)  # Red-filled rectangle
        cv2.putText(img, "ALERT", (button_x1 + 50, button_y1 + 50), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 4)

    # Overlay box based on position
    if position == "Left" or position == "Right":
        cv2.rectangle(img, (0, 0), (frame_width, frame_height), (0, 165, 255), 5)  # Orange box
    elif position == "Middle":
        cv2.rectangle(img, (0, 0), (frame_width, frame_height), (255, 0, 0), 5)  # Blue box

    # Overlay blink count and position
    overlay_text = f"Blinks: {blink_count} | Position: {position}"
    cv2.putText(img, overlay_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Convert image for display
    _, img_encoded = cv2.imencode('.jpg', img)
    bbox_bytes = 'data:image/jpeg;base64,' + str(b64encode(img_encoded), 'utf-8')
    bbox = bbox_bytes


<IPython.core.display.Javascript object>