In [1]:
import cv2
import numpy as np
import tensorflow as tf
import mediapipe as mp
import time

In [2]:
# Define
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

# Fungsi untuk deteksi dengan Mediapipe
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR

    return image, results

# Draw hand connections
def draw_landmarks(image, results):
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.left_hand_landmarks,
            mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
            connection_drawing_spec=mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
        )
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.right_hand_landmarks,
            mp_holistic.HAND_CONNECTIONS,
            landmark_drawing_spec=mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
            connection_drawing_spec=mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
        )

# Fungsi untuk mengekstraksi keypoints
def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

    return np.concatenate([lh, rh])

# Daftar class actions
actions = np.array(['A', 'B', 'C', 'D', 'E', 'F', 
                    'G', 'H', 'I', 'J', 'K', 'L', 
                    'M', 'N', 'O', 'P', 'Q', 'R', 
                    'S', 'T', 'U', 'V', 'W', 'X', 
                    'Y', 'Z', 'Halo', 'Perkenalkan', 'Nama',
                    'Saya', 'Senang', 'Bertemu', 'Kamu'])


In [3]:
model = tf.keras.models.load_model("33C_GRU_acc098_loss01_25seq_yud.h5")



In [None]:
# Initialize variables
sequence = []
sentence = []
predictions = []

threshold = 0.95

state = 'countdown'
countdown_time = 3
display_time = 2
start_time = time.time()

cap = cv2.VideoCapture(0)
mp_holistic = mp.solutions.holistic

# Helper function to show countdown
def show_countdown(image, seconds_left):
    cv2.putText(image, f'Memulai prediksi dalam {seconds_left}..', (100, 250),
                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 4, cv2.LINE_AA)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read frame
        ret, frame = cap.read()

        current_time = time.time()

        if state == 'countdown':
            seconds_left = countdown_time - int(current_time - start_time)
            image = frame.copy()
            show_countdown(image, seconds_left)

            if seconds_left <= 0:
                state = 'collecting'
                start_time = current_time
                sequence = []

        elif state == 'collecting':
            # Perform detection
            image, results = mediapipe_detection(frame, holistic)
            draw_landmarks(image, results)

            # Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)

            if len(sequence) == 30:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]

                print(actions[np.argmax(res)])
                predictions.append(np.argmax(res))

                # Update sentence based on prediction
                if np.unique(predictions[-28:])[0] == np.argmax(res):
                    if res[np.argmax(res)] > threshold:
                        if len(sentence) > 0:
                            if actions[np.argmax(res)] != sentence[-1]:
                                sentence.append(actions[np.argmax(res)])
                        else:
                            sentence.append(actions[np.argmax(res)])

                if len(sentence) > 5:
                    sentence = sentence[-5:]

                state = 'displaying'
                start_time = current_time

        elif state == 'displaying':
            # Display the prediction
            if len(sentence) > 0:
                cv2.putText(image, ' '.join(sentence), (3, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            if (current_time - start_time) > display_time:
                state = 'countdown'
                start_time = current_time

        # Show the frame
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [19]:
cap.release()
cv2.destroyAllWindows()