In [1]:
import tensorflow as tf
import pandas as pd
import mediapipe as mp
import cv2
import numpy as np

In [2]:
model = tf.keras.models.load_model('landmark-classifier')
model

<keras.engine.sequential.Sequential at 0x2170a435f70>

In [3]:
df = pd.read_csv('landmark_test.csv')
df.head()

Unnamed: 0,HANDEDNESS,SCORE,WRIST_X,WRIST_Y,WRIST_Z,THUMB_CMC_X,THUMB_CMC_Y,THUMB_CMC_Z,THUMB_MCP_X,THUMB_MCP_Y,...,PINKY_PIP_X,PINKY_PIP_Y,PINKY_PIP_Z,PINKY_DIP_X,PINKY_DIP_Y,PINKY_DIP_Z,PINKY_TIP_X,PINKY_TIP_Y,PINKY_TIP_Z,IS_Y
0,Right,0.977647,0.702991,0.897968,2.433888e-07,0.639347,0.882083,-0.017095,0.583474,0.802773,...,0.734246,0.656133,-0.071648,0.726018,0.723912,-0.084286,0.718137,0.786529,-0.092157,False
1,Left,0.985491,0.295452,0.910555,4.220274e-07,0.360321,0.882685,-0.030225,0.416379,0.815846,...,0.225766,0.575336,-0.051741,0.206961,0.522553,-0.059381,0.193193,0.470217,-0.063901,False
2,Right,0.972596,0.683717,0.888127,4.904758e-07,0.619721,0.860116,-0.024976,0.558577,0.792674,...,0.747147,0.53162,-0.063509,0.766081,0.471743,-0.074781,0.781259,0.4133,-0.082728,False
3,Left,0.983499,0.472606,0.578923,-6.723511e-07,0.537697,0.538138,-0.049171,0.585324,0.47081,...,0.326382,0.354623,-0.104931,0.281053,0.336804,-0.113405,0.246604,0.305067,-0.116657,True
4,Right,0.943732,0.842368,0.528718,-1.469167e-07,0.822685,0.517418,-0.023772,0.794364,0.496827,...,0.847335,0.388905,-0.025723,0.853253,0.368919,-0.027401,0.855151,0.349502,-0.0271,True


In [4]:
from contextlib import contextmanager

@contextmanager
def open_video(*args):
    # Code to acquire resource, e.g.:
    video_capture = cv2.VideoCapture(*args)

    def frame_generator():
        while video_capture.isOpened():
            success, frame = video_capture.read()
            if not success:
                break

            yield frame
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        return
    try:
        yield frame_generator()
    finally:
        video_capture.release()

In [5]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

In [6]:
def parse_landmark(hand_landmark):
    landmark = tuple((lm.x, lm.y, lm.z) for lm in hand_landmark.landmark)
    return np.array(landmark)

In [20]:
WIN_NAME = 'Camera'
hand_landmark = None

THRES = .95

with open_video(0) as video, mp_hands.Hands(static_image_mode=True,
                                            max_num_hands=2,
                                            min_detection_confidence=0.7) as hands:
    for frame in video:
        frame = cv2.flip(frame.copy(), 1)
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(image)
        if not results.multi_hand_landmarks:
            cv2.imshow(WIN_NAME, frame)
            continue
        annotated_image = frame

        predictions = []
        for i, hand_landmarks in enumerate(results.multi_hand_landmarks):
            hand_landmark = hand_landmarks
            landmark = parse_landmark(hand_landmark)
            pred = model.predict(landmark.reshape((1, -1))).ravel()[0]

            mp_drawing.draw_landmarks(
                annotated_image,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())

            is_pos = pred > THRES

            x, y = 20, frame.shape[0] - 20 - 30 * i
            color = (0,255,0) if is_pos else (0,0,255)
            text = "Yeay" if is_pos else "Nay"
            text = f'{text} conf: {pred*100:.2f}%'
            cv2.putText(annotated_image, text,
                        (x, y),
                        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale=.8,
                        color=color,
                        thickness=2)

        cv2.imshow(WIN_NAME, annotated_image)

cv2.destroyAllWindows()
