# Install Mediapipe

In [1]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.8.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (32.8 MB)
[K     |████████████████████████████████| 32.8 MB 408 kB/s 
Installing collected packages: mediapipe
Successfully installed mediapipe-0.8.10


# Import Dependencies

In [2]:
import IPython

from IPython.display import display, Javascript, Image, Audio
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import PIL
import io
import html
import time

import os
from time import sleep
import threading

# from IPython.display import clear_output

import cv2
import mediapipe as mp
import numpy as np


# Camera for Colab

In [3]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 1024, 1024);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '1024px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 1024; //video.videoWidth;
      captureCanvas.height = 1024; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    // async function stream_frame(label, imgData) {
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [4]:

# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

# Threading for Sound

In [5]:
def play_sound(path_mp3,aud_start=0, aud_stop=3):
    # clear_output()

    cmd = "ffmpeg -i {} -ss {} -to {} 'new.mp3' -y".format(path_mp3, aud_start, aud_stop)
    os.system(cmd)
    IPython.display.display(IPython.display.Audio("new.mp3",autoplay=True))    
    sleep(int(abs(aud_stop-aud_start)))  
    # clear_output()


In [6]:
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Set path here

In [7]:
from IPython.display import clear_output

path_mp3 = '/content/Familymart.mp3'

aud_start = 0
aud_stop = 4
music_thread = None
# music_thread = threading.Thread(target=play_sound(path_mp3,0, 1))

In [8]:
# for i in range(0,4):
#     music_thread = threading.Thread(target=play_sound(path_mp3,0, 2))

# Run Video

In [9]:
def run_music_camera():
    music_thread = threading.Thread(target=play_sound(path_mp3,0, 1))
    # start streaming video from webcam
    video_stream()
    # label for video
    label_html = 'Detecting People...'
    # initialze bounding box to empty
    bbox = ''
    count = 0 

    face_landmarks = [0,1,2,3,4,5,6,7,8,9,10]

    all_visible_previous = False

    while True:
        # start streaming video from webcam
        # video_stream()



        js_reply = video_frame(label_html, bbox)
        if not js_reply:
            break

        # convert JS response to OpenCV Image
        img = js_to_image(js_reply["img"])
        # gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        
        all_visible = False

        with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
            # ret, frame = cap.read()
            
            # Recolor image to RGB
            # image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            image = img
            image.flags.writeable = False
            img_h, img_w = image.shape[:2]
            # Make detection
            results = pose.process(image)
    #         prevous_frame_check = check_all_landmarks_previous_frame(image, results)==False
            
            
            if results.pose_landmarks!=None:
                for land in face_landmarks:
                    keypoint = results.pose_landmarks.landmark[land]
                    keypoint_px =  mp_drawing._normalized_to_pixel_coordinates(keypoint.x, keypoint.y,
                                                                img_w, img_h)
                    if keypoint_px==None:
                        all_visible = False
                        break

                    # if results.pose_landmarks.landmark[land].visibility<0.98:
                    #     all_visible = False
                    #     break
                    else:
                        all_visible = True
                
                if ( (music_thread.isAlive()!=True) and (all_visible==True) and 
                    (all_visible_previous==False) ):
                
                    music_thread = None
                    music_thread = threading.Thread(target=play_sound(path_mp3,aud_start, aud_stop))

                    music_thread.start()
                    clear_output()
                    video_stream()

            
            all_visible_previous = all_visible
                
            # # Recolor back to BGR
            # image.flags.writeable = True
            # image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            
            # # Render detections
            # mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
            #                         mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
            #                         mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) 
            #                         )               
            
            
            if all_visible == True:
                # try:
                img_h, img_w = image.shape[:2]
                
                kp_right = results.pose_landmarks.landmark[8]
                kp_r_px =  mp_drawing._normalized_to_pixel_coordinates(kp_right.x, kp_right.y,
                                                                img_w, img_h)

                kp_left = results.pose_landmarks.landmark[7]
                kp_l_px =  mp_drawing._normalized_to_pixel_coordinates(kp_left.x, kp_left.y,
                                                                img_w, img_h)

                x = kp_r_px[0]
                y = kp_r_px[1]
                
                w = abs(kp_l_px[0]-x)
                h = abs(kp_l_px[1]-y)
                bbox_array = np.zeros([1024,1024,4], dtype=np.uint8)
                bbox_array = cv2.rectangle(bbox_array,(x,y-w), (x+w,y+w), [0,0,255], 2)
                # cv2.rectangle(image, (x,y-w), (x+w,y+w), [0,0,255], 2)
                # # Render detections
                
                # bbox_array[:,:,:3] = mp_drawing.draw_landmarks(bbox_array[:,:,:3], results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                #                         mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
                #                         mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) 
                #                         )               
                
            else:
                x = 0
                y = 0
                w = 0
                h = 0
                bbox_array = np.zeros([1024,1024,4], dtype=np.uint8)
                bbox_array = cv2.rectangle(bbox_array,(x,y), (x+w,y+h), [0,0,255], 2)
                # bbox_array = cv2.rectangle(bbox_array,(x,y), (x+w,y+h), [0,0,255], 2)


            bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
            # convert overlay of bbox into bytes
            bbox_bytes = bbox_to_bytes(bbox_array)
            # update bbox so next frame gets new overlay
            bbox = bbox_bytes
            # except:
            #     bbox = ''


In [10]:
run_music_camera()

<IPython.core.display.Javascript object>