In [1]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2
import time
import pyautogui

# 1. Load Model

In [2]:
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
interpreter.allocate_tensors()
pTime = 0
cTime = 0

# 2. Make Detections

In [3]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)

In [4]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1) 

In [5]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [None]:
cap = cv2.VideoCapture(0)
neutral = 0
left_case, right_case,neutral_case,up_case,down_case = True, True,True, True, True
print("taking neutral pos : ")
time.sleep(1)
print("5")
time.sleep(1)
print("4")
time.sleep(1)
print("3")
time.sleep(1)
print("2")
time.sleep(1)
print("1")
time.sleep(1)
print("capturing...")
while cap.isOpened():
    ret, frame = cap.read()
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    
    # Rendering 
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    nose = keypoints_with_scores[0][0][0]
    nose = np.array(nose[:2]*[480,640]).astype(int)
    left_right_value = nose[1]
    up_down_value = nose[0]
    if neutral_case:
        print("tracker starts in : ")
        time.sleep(1)
        print("5")
        time.sleep(1)
        print("4")
        time.sleep(1)
        print("3")
        time.sleep(1)
        print("2")
        time.sleep(1)
        print("1")
        time.sleep(1)
        print("0")
        print("spacebar")
        pyautogui.press("space")
        neutral = nose
        neutral_case = False
        
    if neutral[1]+50 > left_right_value > neutral[1]-50:
        left_case = True
        right_case = True
        
    if neutral[0]+50 > up_down_value > neutral[0]-50:
        up_case = True
        down_case = True
        
    if left_case:
        if neutral[1]+50 < left_right_value:
            left_case = False
            print("left")
            pyautogui.press("a")
            
    if right_case:
        if neutral[1]-50 > left_right_value:
            right_case = False
            print("right")
            pyautogui.press("d")
            
    if up_case:
        if neutral[0]-40 > up_down_value:
            up_case  = False
            print("up")
            pyautogui.press("w")
    if down_case:
        if neutral[0]+50 < up_down_value:
            down_case  = False
            print("down")
            pyautogui.press("s")
        
    cTime = time.time()
    fps = 1 / (cTime - pTime)
    pTime = cTime
    frame = cv2.flip(frame, 1)
    cv2.line(frame, (neutral[1]-70,0), (neutral[1]-70,480), (255, 0, 255), 2)
    cv2.line(frame, (neutral[1]+70,0), (neutral[1]+70,480), (255, 0, 255), 2)
    cv2.line(frame, (0,neutral[0]-40), (640,neutral[0]-40), (255, 0, 255), 2)
    cv2.line(frame, (0,neutral[0]+50), (640,neutral[0]+50), (255, 0, 255), 2)
    cv2.putText(frame, str(int(fps)), (10, 70), cv2.FONT_HERSHEY_PLAIN, 2, (255, 0, 255), 3)
    cv2.imshow('MoveNet Lightning', frame)
    
    if cv2.waitKey(1) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

taking neutral pos : 
5
4
3
2
1
capturing...
tracker starts in : 
5
4
3
2
1
0
spacebar
right
left
left
left
left
left
up
up
right
left
left
left
right
right
right
right
right
right
right
right
right
right
right
right
right
left
left
left
left
right
left
left
right
right
right
right
right
up
down
up
up
up
right
right
left
right
left
up
right
left
left
right
down
up
left
left
down
up
left
left
down
right
up
right
right
right
up
up
right
left
left
left
left
left
left
left
left
left
left
up
up
up
up
up
up
up
up
up
up
up
up
down


# 3. Draw Keypoints

# 4. Draw Edges