In [1]:
# # Nếu chưa có thư viện thì run cell này
# !pip install mediapipe
# !pip install pandas
# !pip install tensorflow
# !pip install numpy

# 1. Import and Install Dependencies

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from PIL import Image, ImageOps

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(
        image
        )                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [4]:
def draw_styled_landmarks(image, results):
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [5]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z, res.visibility] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*4)
    rh = np.array([[res.x, res.y, res.z, res.visibility] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*4)
    return np.concatenate([pose, lh, rh])

# 3. Setup Folders for Collection

In [6]:
actions = np.array(
    [
    "Bye",
    "Eat",
    "Fine",
    "Good",
    "Hello",
    "I",
    "You",
    "Love",
    "_",
    "Read",
    "What",
    "Name",
    "Sleep",
    "Your"
  ]
    )
WEIGHT_PATH = os.path.join('./weightsnewdata.h5')

sequence_length = 10


# 4. Test in Real Time

In [7]:
import tensorflow as tf

2023-05-05 17:54:39.221839: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-05 17:54:39.410508: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/nhamcotdo/anaconda3/lib/python3.7/site-packages/cv2/../../lib64:
2023-05-05 17:54:39.410557: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-05-05 17:54:41.087394: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; 

In [10]:
actions = np.array(
    [
    "Bye",
    "Eat",
    "Fine",
    "Good",
    "Hello",
    "I",
    "You",
    "Love",
    "_",
    "Read",
    "What",
    "Name",
    "Sleep",
    "Your"
  ]
    )
WEIGHT_PATH = os.path.join('./train_ok/weight_rich_padding_bs2_lr0.001_lastest.h5')

sequence_length = 10


In [11]:
tf.__version__

'2.11.0'

In [12]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res[:6]):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num%3], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    
    for num, prob in enumerate(res[6:]):
        cv2.rectangle(output_frame, (input_frame.shape[1]-120,60+num*40), (input_frame.shape[1]-120+int(prob*100), 90+num*40), colors[num%3], -1)
        cv2.putText(output_frame, actions[num+6], (input_frame.shape[1]-120, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [13]:
model = tf.keras.models.load_model(WEIGHT_PATH)

2023-05-05 17:55:15.608625: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/nhamcotdo/anaconda3/lib/python3.7/site-packages/cv2/../../lib64:
2023-05-05 17:55:15.608677: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2023-05-05 17:55:15.608708: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (nhamcotdo): /proc/driver/nvidia/version does not exist
2023-05-05 17:55:15.609078: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate c

In [15]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

# cap = cv2.VideoCapture('/home/nhamcotdo/Downloads/Phân-20230409T104504Z-001/')
# cap = cv2.VideoCapture('/home/nhamcotdo/Downloads/PhânLoai/PhânLoai/Bye/2023-04-09 12_30_39.avi')
cap = cv2.VideoCapture(0)
fps = cap.get(cv2.CAP_PROP_FPS)
print("FPS:", fps)
# cap = cv2.VideoCapture(0)
# Set mediapipe model
try:
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
        while cap.isOpened():

            # Read feed
            ret, frame = cap.read()

            # if not ret:
            #     cv2.destroyAllWindows()
            #     break
            # frame = resize_with_padding(frame, (int(frame.shape[1]*1.2), int(frame.shape[0]*1.2)))
            image, results = mediapipe_detection(frame, holistic)
            # Make detections
            # Draw landmarks
            # draw_styled_landmarks(image, results)

            # 2. Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-sequence_length:]

            if len(sequence) == sequence_length:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                print(actions[np.argmax(res)])
                print(res[np.argmax(res)])

                predictions.append(np.argmax(res))

                if np.unique(predictions[-10:])[0] == np.argmax(res):
                    if res[np.argmax(res)] > threshold:

                        if len(sentence) > 0:
                            if actions[np.argmax(res)] != sentence[-1]:
                                sentence.append(actions[np.argmax(res)])
                        else:
                            sentence.append(actions[np.argmax(res)])
                # print(sentence)
                if len(sentence) > 5:
                    sentence = sentence[-5:]

                # Viz probabilities
                image = prob_viz(res, actions, image, colors)

            cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
            cv2.putText(image, ' '.join(sentence), (3, 20),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            # Show to screen
            cv2.imshow('OpenCV Feed', image)

            # Break gracefully
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
except:
    cap.release()
    cv2.destroyAllWindows()

cap.release()
cv2.destroyAllWindows()


FPS: 30.0


QObject::moveToThread: Current thread (0x55ecc822dd30) is not the object's thread (0x55ecc856aa40).
Cannot move to target thread (0x55ecc822dd30)

QObject::moveToThread: Current thread (0x55ecc822dd30) is not the object's thread (0x55ecc856aa40).
Cannot move to target thread (0x55ecc822dd30)

QObject::moveToThread: Current thread (0x55ecc822dd30) is not the object's thread (0x55ecc856aa40).
Cannot move to target thread (0x55ecc822dd30)

QObject::moveToThread: Current thread (0x55ecc822dd30) is not the object's thread (0x55ecc856aa40).
Cannot move to target thread (0x55ecc822dd30)

QObject::moveToThread: Current thread (0x55ecc822dd30) is not the object's thread (0x55ecc856aa40).
Cannot move to target thread (0x55ecc822dd30)

QObject::moveToThread: Current thread (0x55ecc822dd30) is not the object's thread (0x55ecc856aa40).
Cannot move to target thread (0x55ecc822dd30)

QObject::moveToThread: Current thread (0x55ecc822dd30) is not the object's thread (0x55ecc856aa40).
Cannot move to tar

_
0.996225
_
0.99590826
_
0.9958994
_
0.99772674
_
0.9982963
_
0.9965733
_
0.9977646
_
0.99587375
_
0.99713814
_
0.99550796
_
0.9952761
_
0.9923084
_
0.99586236
_
0.99512273
_
0.99266666
_
0.9940842
_
0.9859359
_
0.98543173
_
0.98056376
_
0.98487145
_
0.96762174
_
0.98361224
_
0.97201097
Eat
0.96298504
Name
0.9995116
Name
0.9999107
Name
0.99967015
Name
0.9997458
Good
0.71274596
Good
0.9996227
Good
0.99997807
Good
0.99998236
Good
0.99999607
Good
0.9065296
Good
1.0
Good
1.0
Good
0.99956316
I
0.9982529
I
0.9999609
I
1.0
I
0.9999999
Eat
0.98376197
Sleep
0.95059174
Eat
0.4874334
_
0.5144036
_
0.523714
_
0.4769276
_
0.48618016
_
0.5217947
_
0.5134777
_
0.5551366
_
0.6154392
_
0.6106598
Name
0.9959009
Name
0.9999554
Name
0.9997477
Name
0.999577
Name
0.90664357
Good
0.99262846
Good
0.9996997
Good
0.99990594
Good
0.9999368
Good
0.51163614
Good
0.5536762
Good
0.72565675
Name
0.43547395
Good
0.7305367
Name
0.5100542
Good
0.81015074
Name
0.46371064
Good
0.7245374
Name
0.46728346
Good
0.73327476
Go

In [84]:
cap = cv2.VideoCapture('/home/nhamcotdo/Downloads/PhânLoai/PhânLoai/Bye/2023-04-09 12_30_39.avi')

In [86]:
rt, image = cap.read()
def resize_with_padding(img, expected_size):
    img = Image.fromarray(img)
    img.thumbnail((expected_size[0], expected_size[1]))
    delta_width = expected_size[0] - img.size[0]
    delta_height = expected_size[1] - img.size[1]
    pad_width = delta_width // 2
    pad_height = delta_height // 2
    padding = (pad_width, pad_height, delta_width - pad_width, delta_height - pad_height)
    return np.array(ImageOps.expand(img, padding))

img = resize_with_padding(image, (int(image.shape[1]*1.2), int(image.shape[0]*1.2)))

cv2.imshow('as', img)
if cv2.waitKey(1) == 'q':
    cv2.destroyAllWindows()

In [75]:
cv2.destroyAllWindows()

1.9