### MoveNet: 매우 빠르고 정확한 포즈 감지
---

In [1]:
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.models import load_model


In [25]:
hub_model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1')
movenet = hub_model.signatures['serving_default']

In [31]:
# main
cap = cv2.VideoCapture('./movenet/KakaoTalk_20221124_164253142.mp4')

get_state = ['get_on', 'get_off', 'nothing']
abnormal_state = ['stand','walk','sit down','lay down','grab handle','violence']

while cap.isOpened():
    ret, frame = cap.read()
    point = []   
    # Resize image
    
    #frame = cv2.flip(frame,0)
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 192,256)
    input_img = tf.cast(img, dtype=tf.int32)

    # Detection section
    results = movenet(input_img)
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))

    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1)

    #result = model.predict(np.array(point).reshape(-1,16,1,3))
    model = load_model('./movenet/18-0.3567(승하차CNN).hdf5') # CNN 전체 데이터
    
    if len(point) == 17:
        switch = [point[16], point[14], point[12], point[11], point[13], point[15], point[4], point[0], point[2], point[1],
                point[10], point[8], point[6], point[5], point[7], point[9]]
        result = model.predict(np.array(switch).reshape(-1,16,1,3))

        cv2.putText(frame, get_state[result.argmax()], (point[0][0]-100, point[0][1]-100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1, lineType=cv2.LINE_AA)
    cv2.imshow('Movenet Multipose', frame)
        
    if cv2.waitKey(1) & 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()



In [3]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [4]:
# Function to loop through each person detected and render
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
    for person in keypoints_with_scores:
        draw_connections(frame, person, edges, confidence_threshold)
        draw_keypoints(frame, person, confidence_threshold)

In [5]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, _ = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, _ in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)

In [6]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    i = 0
    y, x, _ = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))

    for kp in shaped:
        if (len(point) <= 16):
            ky, kx, kp_conf = kp
            if kp_conf > confidence_threshold:
                cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1)
                cv2.putText(frame, str(i), (int(kx), int(ky)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1, lineType=cv2.LINE_AA)
                point.append((int(kx), int(ky), 0))
                i+=1
        elif len(point) > 16: break