In [1]:
!pip install face-recognition

Collecting face-recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Collecting face-recognition-models>=0.3.0 (from face-recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566170 sha256=d720ec99c2a035690c80113f235c9b8cc8e9435647525eb28cb14aa7f78306f4
  Stored in directory: /root/.cache/pip/wheels/7a/eb/cf/e9eced74122b679557f597bb7c8e4c739cfcac526db1fd523d
Successfully built face-recognition-models
Installing collected packages: face-recognition-models, face-recognition
Successfully installed face-recognition-1

In [2]:
# import the necessary packages
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import face_recognition
import argparse
import imutils
import pickle
import time
import cv2
import numpy as np
import PIL
import io
import html
from google.colab.patches import cv2_imshow

In [3]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes


# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "Status:";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '' +
          'When finished, click here or on the video to stop this demo';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();

      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }

      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }

      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;

      return {'create': preShow - preCreate,
              'show': preCapture - preShow,
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [4]:
# load the known faces and embeddings
print("[INFO] loading encodings...")
data = pickle.loads(open("sample_data/encodings.pickle", "rb").read())

[INFO] loading encodings...


In [5]:
# initialize the video stream and pointer to output video file, then
# allow the camera sensor to warm up
print("[INFO] starting video stream...")
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
writer = None
#time.sleep(2.0)
count = 0

# loop over frames from the video file stream
while True:
      js_reply = video_frame(label_html, bbox)

      if not js_reply:
        break

      # convert JS response to OpenCV Image
      img = js_to_image(js_reply["img"])

      # create transparent overlay for bounding box
      bbox_array = np.zeros([480,640,4], dtype=np.uint8)

      # convert the input frame from BGR to RGB then resize it to have
      # a width of 750px (to speedup processing)
      rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      rgb = imutils.resize(img, width=750)
      r = img.shape[1] / float(rgb.shape[1])

      # detect the (x, y)-coordinates of the bounding boxes
      # corresponding to each face in the input frame, then compute
      # the facial embeddings for each face
      boxes = face_recognition.face_locations(rgb, model="cnn")
      encodings = face_recognition.face_encodings(rgb, boxes)
      names = []

      # loop over the facial embeddings
      for encoding in encodings:
        # attempt to match each face in the input image to our known
        # encodings
        matches = face_recognition.compare_faces(data["encodings"], encoding)
        name = "Unknown"

        # check to see if we have found a match
        if True in matches:
          # find the indexes of all matched faces then initialize a
          # dictionary to count the total number of times each face
          # was matched
          matchedIdxs = [i for (i, b) in enumerate(matches) if b]
          counts = {}

          # loop over the matched indexes and maintain a count for
          # each recognized face face
          for i in matchedIdxs:
            name = data["names"][i]
            counts[name] = counts.get(name, 0) + 1

          # determine the recognized face with the largest number
          # of votes (note: in the event of an unlikely tie Python
          # will select first entry in the dictionary)
          name = max(counts, key=counts.get)

        # update the list of names
        names.append(name)

      # loop over the recognized faces
      for ((top, right, bottom, left), name) in zip(boxes, names):
         # rescale the face coordinates
         left, top, right, bottom = int(left * r), int(top * r), int(right * r), int(bottom * r)

         # draw the predicted face name on the image
         y = top - 15 if top - 15 > 15 else top + 15
         bbox_array =cv2.rectangle(bbox_array, (left, top), (right, bottom),(0, 255, 0), 2)
         bbox_array = cv2.putText(bbox_array, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)

          # if the video writer is None *AND* we are supposed to write
          # the output video to disk initialize the writer
          #if writer == None and "sample_data/output.avi" != None:
          #  fourcc = cv2.VideoWriter_fourcc(*"MJPG")
          #  writer = cv2.VideoWriter("sample_data/output.avi", fourcc, 20,
          #    (img.shape[1], img.shape[0]), True)

          # if the writer is not None, write the frame with recognized
          # faces t odisk
          #if writer != None:
          #  writer.write(img)

      # check to see if we are supposed to display the output frame to
      # the screen
      if 0 > 0:
        cv2_imshow(img)
        key = cv2.waitKey(1) & 0xFF

        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
          break

      bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
      # convert overlay of bbox into bytes
      bbox_bytes = bbox_to_bytes(bbox_array)
      # update bbox so next frame gets new overlay
      bbox = bbox_bytes

# do a bit of cleanup
cv2.destroyAllWindows()

# check to see if the video writer point needs to be released
#if writer is not None:
#	writer.release()

[INFO] starting video stream...


<IPython.core.display.Javascript object>