In [36]:
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
import tensorflow as tf

from collections import deque
from collections import Counter

In [37]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

In [112]:
# 각 joint의 좌표를 뽑아내는 method
def get_landmark_list(image, landmarks):
    h, w, _ = image.shape
    result = []
    for _, landmark in enumerate(landmarks.landmark):
        x = min(int(landmark.x * w), w-1)
        y = min(int(landmark.y * h), h-1)
        result.append([x,y])
    result = np.array(result, dtype = np.float64)
    # 21 X 2 normalized coordinates
    return result

In [113]:
def preprocess_point_history(image, history):
    if len(history[0]) != 16:
        return 
    else:
        h, w, _ = image.shape
        temp = np.array(history, dtype = np.float64).reshape(1,21,32) # 21 X 32
        temp[:,:,0::2] -= temp[0,0,0]; temp[:,:,1::2] -= temp[0,0,1]
        temp[:,:,0::2] /= w; temp[:,:,1::2] /= h
        history_data = np.zeros((1,16,42))# 16 X 42 변환
        for i in range(32):
            k = 0
            if i%2:
                k = 1
            history_data[0,i//2,k::2] = temp[0,:,i]
        return history_data # 16 X 42 normalized coordinates


In [114]:
def show_result(image, output):
    if output != "":
        cv2.putText(image, "class: " + output, (10, 30),cv2.FONT_HERSHEY_SIMPLEX, 1,(0,0,255),1,cv2.LINE_AA)
    return image

In [119]:
# For webcam input:
cap = cv2.VideoCapture(0)
history = [deque(maxlen=16) for i in range(21)]
model = tf.keras.models.load_model('./motionmodel_total.h5')
classes = ['left', 'right', 'stack', 'fast', 'clock-wise', 'counter clock-wise', 'idle']
output_list = deque(maxlen=10)
data = None

with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue
    image = cv2.flip(image,1)
    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(image)

    # Draw the hand annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_hand_landmarks:
      for hand_landmarks in results.multi_hand_landmarks:
        landmark_list = get_landmark_list(image, hand_landmarks)
        for i in range(21): # 21 개 point의 coordinate 을 history deque에 저장
          history[i].append(landmark_list[i])
        # history = 21X16X2
        data = preprocess_point_history(image, history) # history deque normalization 좀 이상함.. 왜 normalization을 두번하지..
        if data is not None:
          output = classes[np.argmax(model.predict(data))] # model output
          output_list.append(output) # output deque에 저장
        mp_drawing.draw_landmarks(
            image,
            hand_landmarks,
            mp_hands.HAND_CONNECTIONS,
            mp_drawing_styles.get_default_hand_landmarks_style(),
            mp_drawing_styles.get_default_hand_connections_style())
    # Flip the image horizontally for a selfie-view display.
        if len(output_list) > 0:
          label = Counter(output_list).most_common()[0][0] # deque의 최빈값 = label
          image = show_result(image,label)
    cv2.imshow('MediaPipe Hands',image)
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()
cv2.destroyAllWindows()

