In [None]:
%%writefile read_and_load.py

from openvino.inference_engine import IENetwork, IECore

import cv2
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

class read_and_load:
    
    '''
    Class for reading and loading the model
    '''
    
    def __init__(self, model_name, model_path, device, extensions=None):
        
        self.model_weights = model_path+'.bin'
        self.model_structure = model_path+'.xml'
        self.device = device
        self.model_name = model_name
       

    def load_model(self): 
        
        try:
            self.model = IENetwork(self.model_structure,self.model_weights)
        except Exception as e:
            logging.error(f'{e}: check the path of the input model')

        
        self.input_name=next(iter(self.model.inputs))
        self.input_shape=self.model.inputs[self.input_name].shape
        self.output_name=next(iter(self.model.outputs))
        self.output_shape=self.model.outputs[self.output_name].shape
        
        core = IECore()
        self.network = core.load_network(network=self.model,device_name=self.device, num_requests=1)
        logging.info(f'{self.model_name} model loaded successfully')
       
        
    def preprocess_input(self, image):
        input_image = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape(1, *input_image.shape)
        #logging.info(f'{self.model_name} preprocessing of input completed')
        return {self.input_name: input_image}


    

In [None]:
%%writefile face_detect.py

import cv2
import numpy as np
import time
#from openvino.inference_engine import IENetwork, IECore
import sys
import os
import argparse
from read_and_load import read_and_load

class face_detect(read_and_load):
    '''
    Class for the Face Detection Model.
    '''

    def predict(self, image_inp):
      
        net_input = self.preprocess_input(image_inp)
        infer_request_handle = self.network.start_async(request_id=0, inputs=net_input)
        if infer_request_handle.wait() == 0:
            net_output = infer_request_handle.outputs[self.output_name]
            #print(np.shape(net_output))
            boxes = self.preprocess_output(net_output)
        return self.draw_outputs(boxes, image_inp)


        
    def draw_outputs(self, coords, image):
 
        w = image.shape[1]
        h = image.shape[0]
        boxes = []
        for box in coords:
            p1 = (int(box[0] * w), int(box[1] * h))
            p2 = (int(box[2] * w), int(box[3] * h))
            boxes.append([p1[0], p1[1], p2[0], p2[1]])
            image = cv2.rectangle(image, p1, p2, (0, 0, 255), 3)
            crp_img = image[p1[1]:p2[1],p1[0]:p2[0]]
        return boxes,image,crp_img

    def preprocess_output(self, outputs):
        self.threshold = 0.5
        boxes = []
        probs = outputs[0, 0, :, 2]
        for i, p in enumerate(probs):
            if p > self.threshold:
                box = outputs[0, 0, i, 3:]
                boxes.append(box)
        return boxes


In [None]:
%%writefile landmark.py

import cv2
import numpy as np
from read_and_load import read_and_load
#from openvino.inference_engine import IENetwork, IECore


class landmark(read_and_load):
    
    def predict(self, image):
        self.img = image
        net_input = self.preprocess_input(self.img)
        infer_request_handle = self.network.start_async(request_id=0, inputs=net_input)
        if infer_request_handle.wait() == 0:
            self.net_output = infer_request_handle.outputs[self.output_name]
            self.x,self.y = self.find_points()
            right_eye,left_eye = self.crop_eyes(self.x,self.y)
            
            img_circle = self.draw_circle()
            return(img_circle,right_eye,left_eye,self.x,self.y)
           
    
    def find_points(self):
        h,w,_ = np.shape(self.img)
        x=[]
        y=[]
        for i,j in enumerate(np.squeeze(self.net_output)):
            if (i+1)%2 == 0:
                y.append(int(j*h))
            else:
                x.append(int(j*w))
        return(x,y)
    
    def draw_circle(self):
        for i in range(2):
            cv2.rectangle(self.img,(self.x[i]-30,self.y[i]-30),(self.x[i]+30,self.y[i]+30),(255,0,0),1)
        return(self.img) 
    
    def crop_eyes(self,x,y):
        right_eye = self.img[y[0]-15:y[0]+15,x[0]-15:x[0]+15]
        left_eye = self.img[y[1]-15:y[1]+15,x[1]-15:x[1]+15]
        return(right_eye,left_eye)
        
        

In [None]:
%%writefile head_pose.py

from read_and_load import read_and_load
from openvino.inference_engine import IENetwork, IECore
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

class head_pose(read_and_load):
    
    def load_model(self): 

        self.model = IENetwork(self.model_structure,self.model_weights)
        self.input_name=next(iter(self.model.inputs))
        self.input_shape=self.model.inputs[self.input_name].shape
        self.output_name=self.model.outputs.keys()
        self.output = [i for i in self.output_name]
        core = IECore()
        self.network = core.load_network(network=self.model,device_name=self.device, num_requests=1)
        logging.info('Head pose model loaded successfully')
    
    def predict(self, image):
        self.img = image
        net_input = self.preprocess_input(self.img)
        infer_request_handle = self.network.start_async(request_id=0, inputs=net_input)
        if infer_request_handle.wait() == 0:
            self.net_output = [infer_request_handle.outputs[i] for i in self.output]
        return(self.net_output)
    

In [None]:
%%writefile gaze.py

import logging
from read_and_load import read_and_load
from openvino.inference_engine import IENetwork, IECore
import cv2

class gaze(read_and_load):
    
    def load_model(self): 
        self.input_shapes=[]
        self.model = IENetwork(self.model_structure,self.model_weights)
        self.input_names=[i for i in iter(self.model.inputs)]
        for i in self.input_names:
            self.input_shapes.append(self.model.inputs[i].shape)
            
        self.output_name = next(iter(self.model.outputs))
      
        core = IECore()
        self.network = core.load_network(network=self.model,device_name=self.device, num_requests=1)
        #logging.info('Gaze model loaded successfully')
        
        
    def preprocess_input(self, left_eye_image,right_eye_image,head_pose_angles):
        #print(np.shape(left_eye_image))
        left_eye_image = cv2.resize(left_eye_image, (self.input_shapes[1][3], self.input_shapes[1][2]))
        right_eye_image = cv2.resize(right_eye_image, (self.input_shapes[1][3], self.input_shapes[1][2]))
        
        left_eye_image = left_eye_image.transpose((2, 0, 1))
        right_eye_image = right_eye_image.transpose((2, 0, 1))
        
        left_eye_image = left_eye_image.reshape(1, *left_eye_image.shape)
        right_eye_image = right_eye_image.reshape(1, *right_eye_image.shape)
        #logging.info(f'{self.model_name} preprocessing input completed')
        return {self.input_names[0]:head_pose_angles,self.input_names[1]:left_eye_image,self.input_names[2]:right_eye_image}
        
    
    def predict(self, left_eye_image,right_eye_image,head_pose_angles):
        #self.img = image
        net_input = self.preprocess_input(left_eye_image,right_eye_image,head_pose_angles)
        infer_request_handle = self.network.start_async(request_id=0, inputs=net_input)
        if infer_request_handle.wait() == 0:
            self.net_output = infer_request_handle.outputs[self.output_name]
        return(self.net_output)

In [12]:
%%writefile inference.py

import cv2
import logging
import numpy as np
import argparse
from face_detect import face_detect
from head_pose import head_pose
from landmark import landmark
from gaze import gaze
from openvino.inference_engine import IENetwork, IECore
from movemouse import MouseController


logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

def main(args):
    
        path_fd = args.face_path
        fd = face_detect('face detection',path_fd,args.device)
        fd.load_model()

        path_ld = args.landmark_path
        ld = landmark('landmark',path_ld,args.device)
        ld.load_model()

        path_hdps = args.headpose_path
        hp = head_pose('head pose',path_hdps,args.device)
        hp.load_model()

        gaze_path = args.gaze_path
        gz = gaze('Gaze',gaze_path,args.device)
        gz.load_model()

        if args.input_type == 'video':
            cap = cv2.VideoCapture('demo.mp4')
        elif args.input_type == 'cam':
            cap = cv2.VideoCapture(0)
            
        video_writer = cv2.VideoWriter('output1.mp4',cv2.VideoWriter_fourcc(*'mp4v'),10,(1920,1080))


        if not cap.isOpened():
            logging.error('Video file not found. Check the path')


        while(cap.isOpened()):
            ret,frame = cap.read()
            if ret == True:
                #img = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
                boxes,pre_img,crp_img = fd.predict(frame)
                keypoint_image,right_eye,left_eye,x_e,y_e = ld.predict(crp_img)
                hp_vector = hp.predict(crp_img)
                hp_vector = np.reshape(hp_vector,(1,3))
                mouse_points = gz.predict(left_eye,right_eye,hp_vector)

                # rotation vector
                rvec = np.array([0, 0, 0], np.float)
                # translation vector
                tvec = np.array([0, 0, 0], np.float)
                # camera matrix
                camera_matrix = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], np.float)

                result, _ = cv2.projectPoints(mouse_points, rvec, tvec, camera_matrix, None)
                result = result[0][0]

                res = (int(result[0] * 100), int(result[1] * 100))
                e1 = (boxes[0][0]+x_e[0],boxes[0][1]+y_e[0])
                e2 = (boxes[0][0]+x_e[1],boxes[0][1]+y_e[1])


                cv2.arrowedLine(pre_img, e1, (e1[0] - res[0], e1[1] + res[1]), (0, 255, 0), 2)
                cv2.arrowedLine(pre_img, e2, (e2[0] - res[0], e2[1] + res[1]), (0, 255, 0), 2)
                
                #move_mouse = MouseController('medium','medium')
                #move_mouse.move((e1[0] - res[0], e1[1] + res[1]))

                if (args.inter_viz):
                    cv2.imshow('frame',pre_img)
                    video_writer.write(frame)
                    cv2.waitKey(1)
            else:
                break
        cap.release()
        cv2.destroyAllWindows()

if __name__ == '__main__':
     
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_type',default=None,
                   help = 'Enter \'video\' or \'image\' or \'cam\'')
    parser.add_argument('--face_path',default=None,
                   help = 'Enter the path for face detection model')
    parser.add_argument('--headpose_path',default=None,
                   help = 'Enter the path for head pose detection model')
    parser.add_argument('--landmark_path',default=None,
                   help = 'Enter the path for landmark detection model')
    parser.add_argument('--gaze_path',default=None,
                   help = 'Enter the path for gaze estimation model')
    parser.add_argument('--device',default='CPU',
                   help = 'Enter the device to run model')
    parser.add_argument('--inter_viz',action = 'store_true',
                   help = 'Flag for visualization')

    args=parser.parse_args()
    main(args)



Overwriting inference.py


In [13]:
!python inference.py --input_type video --face_path ./intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001 --landmark_path ./intel/landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009 --headpose_path ./intel/head-pose-estimation-adas-0001/FP16/head-pose-estimation-adas-0001 --gaze_path ./intel/gaze-estimation-adas-0002/FP16/gaze-estimation-adas-0002 --device CPU --inter_viz

INFO:root:face detection model loaded successfully
INFO:root:landmark model loaded successfully
INFO:root:Head pose model loaded successfully


In [8]:
%%writefile movemouse.py

import pyautogui
import argparse

class MouseController:
    def __init__(self, precision, speed):
        precision_dict={'high':100, 'low':1000, 'medium':500}
        speed_dict={'fast':1, 'slow':10, 'medium':5}

        self.precision=precision_dict[precision]
        self.speed=speed_dict[speed]

    def move(self, position):
        #print(position)
        x,y=position
        #print(x*2,y*2)
        #pyautogui.moveRel(x*self.precision, -1*y*self.precision, duration=self.speed)
        pyautogui.moveTo(x,y,self.speed)
        pyautogui.doubleClick()
        
    

Overwriting movemouse.py
