In [1]:
import os
import numpy as np
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

2022-12-01 23:39:49.045623: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load Data

In [7]:
# 1. 打包模組需要的 Dataset
sequences = np.load(os.path.join('data-merge-0916.npy'))
sequences.shape

(1080, 30, 258)

In [14]:
# 2. 模組需要的字詞 Labels
actions = np.array(['i', 'need', 'help', 'children', 'open', 'bank', 'account', 'savings', 'have'])
label_map = {label:num for num, label in enumerate(actions)}
print(label_map)

{'i': 0, 'need': 1, 'help': 2, 'children': 3, 'open': 4, 'bank': 5, 'account': 6, 'savings': 7, 'have': 8}


In [10]:
# 120 是每個字詞的資料量，可以再修改
labels = []
for action in actions:
    for _ in range(120):
        labels.append(label_map[action])
print([[actions[l], l] for l in labels[::120]])

[['i', 0], ['need', 1], ['help', 2], ['children', 3], ['open', 4], ['bank', 5], ['account', 6], ['savings', 7], ['have', 8]]


In [11]:
X = sequences.reshape(-1,30,258)
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
print(X.shape)
print(X_train.shape)

(1080, 30, 258)
(972, 30, 258)


# Train Model

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping

In [14]:
model = Sequential()
model.add(GRU(64, activation='relu', input_shape=(30,258))) # LSTM -> GRU
model.add(Dense(64, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

2022-11-22 21:19:41.774582: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
callback = EarlyStopping(monitor='loss', patience=3, restore_best_weights=True)
model.fit(X_train, y_train, epochs=50, callbacks=[callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


<keras.callbacks.History at 0x7f87c7e03b20>

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 64)                62208     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 9)                 585       
                                                                 
Total params: 66,953
Trainable params: 66,953
Non-trainable params: 0
_________________________________________________________________


In [20]:
def accuracy(X, y):
    print(X.shape, y.shape)
    res = model.predict(X)
    accuracy = (np.argmax(res, axis=1) == np.argmax(y, axis=1)).sum()/len(res)
    return accuracy

In [21]:
print(accuracy(X_train, y_train))
print(accuracy(X_test, y_test))

(972, 30, 258) (972, 9)
0.9886831275720165
(108, 30, 258) (108, 9)
0.9814814814814815


# Save & Load Model

In [22]:
model.save("Model/GRU")



INFO:tensorflow:Assets written to: model/1122/assets


INFO:tensorflow:Assets written to: model/1122/assets


In [3]:
from tensorflow.keras.models import load_model

In [4]:
new_model = load_model("Model/GRU")
new_model.summary()

2022-12-01 23:40:30.585001: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 64)                62208     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 9)                 585       
                                                                 
Total params: 66,953
Trainable params: 66,953
Non-trainable params: 0
_________________________________________________________________


In [25]:
print(accuracy(X_train, y_train))

0.9886831275720165


# TFlite

In [23]:
import tensorflow as tf

In [27]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False
tflite_model = converter.convert()

with open('Model/GRU.tflite', 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: /var/folders/d4/3t110mqx2qd_lfkg5cctqx6c0000gn/T/tmpm25x88up/assets


INFO:tensorflow:Assets written to: /var/folders/d4/3t110mqx2qd_lfkg5cctqx6c0000gn/T/tmpm25x88up/assets


# Realtime Test

In [12]:
import cv2
import mediapipe as mp

In [6]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [7]:
colors = [(245,117,16)] * 19
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [8]:
def mediapipe_detection(image, model):
    # Transfer image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    # Make prediction
    results = model.process(image)
    return results

In [9]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(
        image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
        mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
        mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    )
    # Draw pose connections
    mp_drawing.draw_landmarks(
        image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
        mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
    )
    # Draw left hand connections
    mp_drawing.draw_landmarks(
        image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
    ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(
        image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
        mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
        mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
    ) 

In [10]:
def extract_keypoints_without_face(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [15]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Make detections
        results = mediapipe_detection(frame, holistic)
        # Draw landmarks
        draw_styled_landmarks(frame, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints_without_face(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = new_model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            frame = prob_viz(res, actions, frame, colors)
            
        cv2.rectangle(frame, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(frame, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', frame)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
help
help
help
help
help
help
help
help
help
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
need
bank
bank
bank
bank
bank
bank
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
children
bank
bank
bank
bank
bank
bank
bank
bank
bank
account
account
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i
i


account
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
help
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
bank
help
help
help
help
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
savings
help
help
help
help
help
help
help
help
help
help
bank
bank
bank
bank
bank
bank
bank
bank
