## Import Library


In [2]:
import cv2
import numpy as np
import os
import time
import datetime
import mediapipe as mp
from matplotlib import pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

## Data Loading

In [3]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('D:/College/PKMKC/Sound Degla/New Dataset') 

# Actions that we try to detect
actions = np.array(os.listdir(DATA_PATH))
# actions = np.array(['a', 'b', 'c', 'd', 'e', 'aku', 'anda', 'kamu', 'saya', 'selamat pagi', 'selamat siang'])
# actions = np.array(['aku', 'anda', 'kamu', 'saya', 'selamat pagi', 'selamat siang'])
# actions = np.array(['aku', 'apa', 'bagaimana', 'berapa', 'dimana', 'kamu', 'kapan', 'kenapa', 'nama', 'sama_sama', 'terimakasih', 'tidak'])

# Thirty videos worth of data
no_sequences = 100

# Videos are going to be 30 frames in length
sequence_length = 30

actions

array(['a', 'aku', 'anda', 'b', 'c', 'd', 'e', 'kamu', 'saya',
       'selamat pagi', 'selamat siang'], dtype='<U13')

In [4]:
# Mengambil actions sebagai label dan mengubahnya menjadi numerical
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'a': 0,
 'aku': 1,
 'anda': 2,
 'b': 3,
 'c': 4,
 'd': 5,
 'e': 6,
 'kamu': 7,
 'saya': 8,
 'selamat pagi': 9,
 'selamat siang': 10}

## Functions

In [5]:
# Memanggil model mediapipe 
mp_holistic = mp.solutions.holistic         # Holistic model untuk mendeteksi landmark tubuh
mp_drawing = mp.solutions.drawing_utils     # Drawing utilities untuk menggambar landmark tubuh

In [6]:
# Function untuk mendeteksi dan menggambar landmark tubuh
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)      # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                       # Image is no longer writeable
    results = model.process(image)                      # Make prediction
    image.flags.writeable = True                        # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)      # COLOR COVERSION RGB 2 BGR
    return image, results

In [7]:
# Function untuk menggambar landmark tubuh dengan style edited 
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [8]:
# Function untuk melakukan ekstraksi keypoint landmark dan menaruhnya ke dalam array
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [9]:
# Meload model yang telah ditraining
model = load_model('D:/College/PKMKC/Sound Degla/Code/Model/model-1.0 (2023-8-3).h5')

In [10]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
# Function untuk menampilkan visualisasi probabilitas dari label
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    return output_frame

## Live Testing


In [16]:
# 1. New detection variables
sequence = []       # List untuk setiap frame
sentence = []       # List untuk menaruh kata hasil prediksi
predictions = []    # List untuk menaruh hasil prediksi
threshold = 0.8    # Confidence matrix

# Mengakses webcam
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        # print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        
        # 2. Prediction logic
        # Mengekstrak keypoint
        keypoints = extract_keypoints(results)
        sequence.insert(0,keypoints)
        sequence = sequence[:30]
        # sequence.append(keypoints)
        # sequence = sequence[-30:]
        
        # Jika sudah 30 frame
        if len(sequence) == 30:
            # Melakukan prediction
            res = model.predict(np.expand_dims(sequence, axis=0))[0]    # np.expand_dims -> Berfungsi untuk mengubah shape pada array
            # Menampilkan hasil prediksi
            print(actions[np.argmax(res)])
            # Menambah hasil prediksi ke list predictions
            predictions.append(np.argmax(res))
            
            
        #3. Visualization logic
            # Mengecek hasil dari prediksi apakah sama dengan hasil prediksi pada 10 frame terakhir atau tidak
            if np.unique(predictions[-10:])[0]==np.argmax(res):
                # Mengecek apakah probabilitas hasil prediksi lebih dari threshold atau tidak
                if res[np.argmax(res)] > threshold: 
                    # Jika dalam sentence sudah ada kata
                    if len(sentence) > 0: 
                        # Mengecek action yang sedang dideteksi tidak sama dengan action yang terakhir 
                        if actions[np.argmax(res)] != sentence[-1]:
                            # Menambahkan kata ke dalam sentence
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            # Jika dalam sentence sudah ada 5 kata
            if len(sentence) > 5: 
                # Mengambil 5 kata yang terakhir dari sentence
                sentence = sentence[-5:]

            # Viz probabilities
            # image = prob_viz(res, actions, image, colors)
        
        # Mengambar box di window
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        # Menaruh setence ke window
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

selamat siang
selamat siang
selamat siang
selamat siang
e
a
a
a
a
a
a
a
a
a
a
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
e
e
a
a
a
a
a
a
a
a
selamat siang
selamat siang
selamat siang
selamat siang
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
d
d
d
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
a
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
selamat siang
sela

In [14]:
cap.release()
cv2.destroyAllWindows()