In [1]:
import mediapipe as mp
import numpy as np
import cv2 as cv
import os
import tensorflow as tf
from model import KeyPointClassifier
from model import PointHistoryClassifier
from utils import CvFpsCalc
import copy
import itertools
from collections import Counter
from collections import deque
print(os.getcwd())

import csv

model_path='model/keypoint_classifier/keypoint_classifier.tflite'

c:\Users\thana\Desktop\TheShit\mbse_athome\mbse_code\atHome_code
c:\Users\thana\Desktop\TheShit\mbse_athome\mbse_code\atHome_code


In [2]:
##### argument #####
cap_device = 0
cap_width = 960
cap_height = 540
use_static_image_mode = False
min_detection_confidence = 0.7
min_tracking_confidence = 0.5

##### setting camera #####
cap = cv.VideoCapture(cap_device)
cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)

##### mediapipe hand #####
mp_drawing_styles = mp.solutions.drawing_styles
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
        static_image_mode=use_static_image_mode,
        max_num_hands=1,
        min_detection_confidence=min_detection_confidence,
        min_tracking_confidence=min_tracking_confidence,
    )

##### load model #####
keypoint_classifier = KeyPointClassifier()
keypoint_classifier_labels = ['Open', 'Close', 'Pointer', 'OK']
point_history_classifier = PointHistoryClassifier()
point_history_classifier_labels = ['Stop', 'Clockwise', 'Counter Clockwise', 'Move']


##### measure FPS #####
cvFpsCalc = CvFpsCalc(buffer_len=10)

##### declare history point
history_length = 16
point_history = deque(maxlen=history_length)

# Finger gesture history ################################################
finger_gesture_history = deque(maxlen=history_length)

##### measure fps
fps_que = deque(maxlen=50)


# Classifier Function

In [3]:
def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        # landmark_z = landmark.z

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, landmark_point in enumerate(temp_landmark_list):
        if index == 0:
            base_x, base_y = landmark_point[0], landmark_point[1]

        temp_landmark_list[index][0] = temp_landmark_list[index][0] - base_x
        temp_landmark_list[index][1] = temp_landmark_list[index][1] - base_y

    # Convert to a one-dimensional list
    temp_landmark_list = list(
        itertools.chain.from_iterable(temp_landmark_list))

    # Normalization
    max_value = max(list(map(abs, temp_landmark_list)))

    def normalize_(n):
        return n / max_value

    temp_landmark_list = list(map(normalize_, temp_landmark_list))

    return temp_landmark_list

def calc_bounding_rect(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_array = np.empty((0, 2), int)

    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)

        landmark_point = [np.array((landmark_x, landmark_y))]

        landmark_array = np.append(landmark_array, landmark_point, axis=0)

    x, y, w, h = cv.boundingRect(landmark_array)

    return [x, y, x + w, y + h]

# History Function

In [4]:
def pre_process_point_history(image, point_history):
    image_width, image_height = image.shape[1], image.shape[0]

    temp_point_history = copy.deepcopy(point_history)

    # Convert to relative coordinates
    base_x, base_y = 0, 0
    for index, point in enumerate(temp_point_history):
        if index == 0:
            base_x, base_y = point[0], point[1]

        temp_point_history[index][0] = (temp_point_history[index][0] - base_x) / image_width
        temp_point_history[index][1] = (temp_point_history[index][1] - base_y) / image_height

    # Convert to a one-dimensional list
    temp_point_history = list(
        itertools.chain.from_iterable(temp_point_history))

    return temp_point_history
    
def draw_point_history(image, point_history):
    for index, point in enumerate(point_history):
        if point[0] != 0 and point[1] != 0:
            cv.circle(image, (point[0], point[1]), 1 + int(index / 2),
                      (152, 251, 152), 2)

    return image

# Video Loop

In [6]:
##### setting camera #####
cap = cv.VideoCapture(cap_device)
cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)
while cap.isOpened():
    fps = cvFpsCalc.get()
    fps_que.append(fps)

    if cv.waitKey(10) & 0xFF == ord('q'):
        break

    ret, image = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    image = cv.flip(image, 1)
    debug_image = copy.deepcopy(image)
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True

    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            ##### test normalize 
            landmark_point = []
            for _, landmark in enumerate(hand_landmarks.landmark):
                landmark_point.append([landmark.x, landmark.y, landmark.z])
            print(np.max(landmark_point))
            
            ##### mediapipe output normarlize convert to pixel
            landmark_list = calc_landmark_list(debug_image, hand_landmarks)

            ##### hand classifier
            pre_processed_landmark_list = pre_process_landmark(landmark_list)
            hand_sign_id = keypoint_classifier(pre_processed_landmark_list)

            ##### store hand classifier
            if hand_sign_id == 2:  # Point gesture
                point_history.append(landmark_list[8])
            else:
                point_history.append([0, 0])

            ##### point history classifier
            finger_gesture_id = 0
            if len(point_history) == history_length:
                pre_processed_point_history_list = pre_process_point_history(debug_image, point_history)
                finger_gesture_id = point_history_classifier(pre_processed_point_history_list)

            # Calculates the gesture IDs in the latest detection
            finger_gesture_history.append(finger_gesture_id)
            most_common_fg_id = Counter(finger_gesture_history).most_common()
            dynamic_sign = point_history_classifier_labels[most_common_fg_id[0][0]]


            ##### display information|
            ### Draw Landmarks
            mp_drawing.draw_landmarks(debug_image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                      mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style())

            ### draw bounding box 
            brect = calc_bounding_rect(debug_image, hand_landmarks)
            cv.rectangle(debug_image, (brect[0], brect[1]), (brect[2], brect[3]), (0, 0, 0), 1)

            ### draw info text
            cv.rectangle(debug_image, (brect[0], brect[1]), (brect[2], brect[1] - 22),(0, 0, 0), -1)
            hand_sign_text = keypoint_classifier_labels[hand_sign_id]
            info_text = handedness.classification[0].label[0:]
            if hand_sign_text != "":
                info_text = info_text + ':' + hand_sign_text
            cv.putText(debug_image, info_text, (brect[0] + 5, brect[1] - 4), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1, cv.LINE_AA)
            finger_gesture_text = dynamic_sign
            if finger_gesture_text != "":
                cv.putText(debug_image, "Finger Gesture:" + finger_gesture_text, (10, 60), cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 4, cv.LINE_AA)
                cv.putText(debug_image, "Finger Gesture:" + finger_gesture_text, (10, 60), cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2, cv.LINE_AA)

                
        
    debug_image = draw_point_history(debug_image, point_history)
    cv.putText(debug_image, "FPS:" + str(fps), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 4, cv.LINE_AA)
    cv.putText(debug_image, "FPS:" + str(fps), (10, 30), cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255), 2, cv.LINE_AA)

    cv.imshow('Raw Webcam Feed', debug_image)        

cap.release()
cv.destroyAllWindows()

1.0365201234817505
0.7444759607315063
0.7214421629905701
0.7290861010551453
0.734440803527832
0.7362349033355713
0.7459344267845154
0.7447525858879089
0.7523049712181091
0.7532569766044617
0.7540429830551147
0.7532059550285339
0.7541732788085938
0.7588261961936951
0.7589492201805115
0.7593148946762085
0.7618511319160461
0.7642464637756348
0.7631121277809143
0.7619937658309937
0.7618247866630554
0.7627140283584595
0.7669035196304321
0.7632111310958862
0.7646145820617676
0.7638397216796875
0.7657806873321533
0.7642182111740112
0.7651827931404114
0.7643954753875732
0.7570432424545288
0.7618802785873413
0.7578134536743164
0.7561758756637573
0.753708004951477
0.7588655352592468
0.7801015973091125
0.8116252422332764
0.8535477519035339
0.9013633728027344
0.9228579998016357
0.9241777062416077
0.9602087736129761
0.9776685833930969
0.9980572462081909
1.029242992401123
1.064603328704834
1.0982863903045654
1.1273870468139648
1.140918493270874
1.1470239162445068
1.1080100536346436
1.081907629966735

# Logging Csv

In [None]:
csv_path = 'model/keypoint_classifier/test_keypoint.csv'
##### setting camera #####
cap = cv.VideoCapture(cap_device)
cap.set(cv.CAP_PROP_FRAME_WIDTH, cap_width)
cap.set(cv.CAP_PROP_FRAME_HEIGHT, cap_height)
while cap.isOpened():
    fps = cvFpsCalc.get()

    if cv.waitKey(10) & 0xFF == ord('q'):
        break

    ret, image = cap.read()
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    image = cv.flip(image, 1)
    debug_image = copy.deepcopy(image)
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)

    image.flags.writeable = False
    results = hands.process(image)
    image.flags.writeable = True

    if results.multi_hand_landmarks:
        for hand_landmarks, handedness in zip(results.multi_hand_landmarks, results.multi_handedness):
            ##### mediapipe output normarlize convert to pixel
            landmark_list = calc_landmark_list(debug_image, hand_landmarks)

            ##### hand classifier
            pre_processed_landmark_list = pre_process_landmark(landmark_list)
            with open(csv_path, 'a', newline="") as f:
                writer = csv.writer(f)
                writer.writerow([1, *pre_processed_landmark_list])

            ##### display information|
            ### Draw Landmarks
            mp_drawing.draw_landmarks(debug_image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                      mp_drawing_styles.get_default_hand_landmarks_style(), mp_drawing_styles.get_default_hand_connections_style())

    cv.imshow('Raw Webcam Feed', debug_image)        

cap.release()
cv.destroyAllWindows()