In [1]:
import os
import cv2
import mediapipe
import numpy as np
from tensorflow.keras.models import load_model
from PIL import ImageFont, ImageDraw, Image

2022-12-02 12:14:01.127201: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def detect_landmarks(holistic, image):
    return holistic.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(
        image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    )
    mp_drawing.draw_landmarks(
        image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
    ) 
    mp_drawing.draw_landmarks(
        image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
    )
    return image
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, left_hand, right_hand])
def prob_viz(draw, res):
    for num, prob in enumerate(res):
        draw.rectangle(((0,105+num*60), (int(prob*100), 135+num*60)), fill="#F57510")
        draw.text((0, 80+num*60), actions_to_text[actions[num]], fill=(255, 255, 255), font=font)
    return draw

In [3]:
mp_holistic = mediapipe.solutions.holistic
mp_drawing = mediapipe.solutions.drawing_utils
fontpath = 'NotoSansTC-Regular.otf' # Google Fonts
font = ImageFont.truetype(fontpath, 50)
actions_to_text = {
    'deal': "辦", 
    'card_h': "卡", 
    'card': "證件", 
    'savings': "儲蓄", 
    'need': "要",
    'have': "有", 
    'dont_have': "沒有", 
    'pass': "護", 
    'foreign_currency': "外幣", 
    'driver': "駕駛",
    'bank': "銀行", 
    'health': "健康", 
    'transcript': "謄本",
    'bring': "帶", 
    'is': "是",
    'transfer': "轉帳", 
    'i': "我", 
    'single': "單一", 
    'okay': "可以", 
    'below': "以下",
    'card_d': "卡", 
    'years_old': "歲", 
    'good': "好",
    'my': "我的", 
    'book': "名簿",
    'stamp': "印章", 
    'household': "戶籍/戶口", 
    'forget': "忘記", 
    'id': "身份", 
    'replace': "代替",
    'account': "帳戶",
    'online': "網路", 
    'payroll': "薪資",
    'children': "小孩",
    'only_have': "只有",
    'open': "開", 
    'help': "幫", 
    'question_mark': "?", 
    '14': "14",
}

In [4]:
model = load_model("model/1122")
actions = np.array(['i', 'need', 'help', 'children', 'open', 'bank', 'account', 'savings', 'have'])
model.summary()

2022-12-02 12:14:14.479699: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 64)                62208     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 9)                 585       
                                                                 
Total params: 66,953
Trainable params: 66,953
Non-trainable params: 0
_________________________________________________________________


In [6]:
sequence = []
predictions = []
sentence = []
threshold = 0.5

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    cap = cv2.VideoCapture(0)
    while True:
        _, image = cap.read()
        
        results = detect_landmarks(holistic, image)
        image = draw_landmarks(image, results)
        cv2.rectangle(image, (0,0), (800, 80), (245, 117, 16), -1)
        imgPil = Image.fromarray(image)
        draw = ImageDraw.Draw(imgPil)
        
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            prediction = actions[np.argmax(res)]
            predictions.append(prediction)
            
            if (np.unique(predictions[-7:])==prediction).all() and res[np.argmax(res)] > threshold: 
                if len(sentence) > 0 and actions_to_text[prediction] != sentence[-1]:
                    sentence.append(actions_to_text[prediction])
                else:
                    sentence.append(actions_to_text[prediction])
                        
            if len(sentence) > 5: 
                sentence = sentence[-5:]
            
            draw = prob_viz(draw, res)
                        
        draw.text((0,0), ''.join(sentence), fill=(255, 255, 255), font=font)
        image = np.array(imgPil)
        cv2.imshow('Demo', image)

        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()



