## Import Library


In [1]:
import cv2
import numpy as np
import os
import time
import datetime
import mediapipe as mp
from matplotlib import pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

## Data Loading

In [2]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('D:/College/PKMKC/Sound Degla/Database/New Dataset') 

# Actions that we try to detect
actions = np.array(os.listdir(DATA_PATH))
# actions = np.array(['a', 'b', 'c', 'd', 'e', 'aku', 'anda', 'kamu', 'saya', 'selamat pagi', 'selamat siang'])
# actions = np.array(['aku', 'anda', 'kamu', 'saya', 'selamat pagi', 'selamat siang'])
# actions = np.array(['aku', 'apa', 'bagaimana', 'berapa', 'dimana', 'kamu', 'kapan', 'kenapa', 'nama', 'sama_sama', 'terimakasih', 'tidak'])

# Thirty videos worth of data
no_sequences = 100

# Videos are going to be 30 frames in length
sequence_length = 30

actions

array(['a', 'aku', 'anda', 'b', 'c', 'd', 'e', 'kamu', 'saya',
       'selamat pagi', 'selamat siang'], dtype='<U13')

In [3]:
# Mengambil actions sebagai label dan mengubahnya menjadi numerical
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'a': 0,
 'aku': 1,
 'anda': 2,
 'b': 3,
 'c': 4,
 'd': 5,
 'e': 6,
 'kamu': 7,
 'saya': 8,
 'selamat pagi': 9,
 'selamat siang': 10}

## Data Preparation

In [4]:
# Mengambil keypoint landmark per frame setiap video untuk setiap action
sequences, labels = [], []      # Sequences -> fitur dari datanya

# Looping untuk setiap actions
for action in actions:
    # Looping untuk setiap video
    for sequence in range(no_sequences):
        window = []     # Window -> List untuk keypoint semua frame setiap video
        # Looping untuk setiap frame
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            # Memasukkan keypoint frame ke dalam list window
            window.append(res)
        # Memasukkan window ke list sequences per video
        sequences.append(window)
        # Menambah label untuk setiap action
        labels.append(label_map[action])

KeyboardInterrupt: 

In [None]:
labels

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,


In [None]:
# Menjadikan sequences sebagai fitur dari data
X = np.array(sequences)

# Mengubah label menjadi categorical list
y = to_categorical(labels).astype(int)

In [None]:
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [6]:
# Memisahkan dataset menjadi data train & data test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

## Modeling

In [7]:
# Menyimpan log callback dari proses training
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [17]:
X_train.shape

(1045, 30, 1662)

In [14]:
y_train.shape

(1045, 11)

In [8]:
# Membuat arsitektur model
model = Sequential()
model.add(LSTM(32, return_sequences=True, activation='relu', input_shape=(30,1662)))    # input_shape = (frame, keypoint)
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [9]:
# Melakukan compile pada model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(
    optimizer=optimizer, 
    loss='categorical_crossentropy', 
    metrics=['categorical_accuracy']
)

In [10]:
# Melakukan training
model.fit(
    X_train, 
    y_train, 
    epochs=5, 
    batch_size=12,
    callbacks=[tb_callback]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x14015dd3b20>

In [116]:
# Melakukan prediction dengan model yang telah dilatih
res = model.predict(X_test)

# Menampilkan data prediksi pada index ke-5
actions[np.argmax(res[4])]



'c'

In [117]:
# Menampilkan data true pada index ke-5
actions[np.argmax(y_test[4])]

'c'

## Evaluation

In [118]:
# Melakukan prediction dengan model yang telah diload
yhat = model.predict(X_test)



In [119]:
# Mengubah data asli dan hasil prediksi menjadi list
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [120]:
# Menampilkan multilabel confusion matrix
multilabel_confusion_matrix(ytrue, yhat)

array([[[51,  0],
        [ 0,  4]],

       [[48,  0],
        [ 0,  7]],

       [[52,  0],
        [ 0,  3]],

       [[51,  0],
        [ 0,  4]],

       [[49,  0],
        [ 0,  6]],

       [[50,  0],
        [ 0,  5]],

       [[46,  0],
        [ 0,  9]],

       [[51,  0],
        [ 0,  4]],

       [[53,  0],
        [ 0,  2]],

       [[48,  0],
        [ 0,  7]],

       [[51,  0],
        [ 0,  4]]], dtype=int64)

In [121]:
# Menampilkan akurasi dari model
acc = accuracy_score(ytrue, yhat)
acc = round(acc, 3)
print(f'Akurasi : {acc}')

Akurasi : 1.0


In [122]:
# Menyimpan model yang telah ditraining
now = datetime.datetime.now()
now = f'{now.year}-{now.month}-{now.day}'
model.save(f'Model/model-{acc} ({now}).h5')

## Live Testing


In [123]:
# colors = [(245,117,16), (117,245,16), (16,117,245)]
# # Function untuk menampilkan visualisasi probabilitas dari label
# def prob_viz(res, actions, input_frame, colors):
#     output_frame = input_frame.copy()
#     for num, prob in enumerate(res):
#         cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
#         cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
#     return output_frame

In [124]:
# # 1. New detection variables
# sequence = []       # List untuk setiap frame
# sentence = []       # List untuk menaruh kata hasil prediksi
# predictions = []    # List untuk menaruh hasil prediksi
# threshold = 0.6     # Confidence matrix

# # Mengakses webcam
# cap = cv2.VideoCapture(0)
# # Set mediapipe model 
# with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#     while cap.isOpened():

#         # Read feed
#         ret, frame = cap.read()

#         # Make detections
#         image, results = mediapipe_detection(frame, holistic)
#         # print(results)
        
#         # Draw landmarks
#         draw_styled_landmarks(image, results)
        
        
#         # 2. Prediction logic
#         # Mengekstrak keypoint
#         keypoints = extract_keypoints(results)
#         sequence.insert(0,keypoints)
#         sequence = sequence[:30]
#         # sequence.append(keypoints)
#         # sequence = sequence[-30:]
        
#         # Jika sudah 30 frame
#         if len(sequence) == 30:
#             # Melakukan prediction
#             res = model.predict(np.expand_dims(sequence, axis=0))[0]    # np.expand_dims -> Berfungsi untuk mengubah shape pada array
#             # Menampilkan hasil prediksi
#             print(actions[np.argmax(res)])
#             # Menambah hasil prediksi ke list predictions
#             predictions.append(np.argmax(res))
            
            
#         #3. Visualization logic
#             # Mengecek hasil dari prediksi apakah sama dengan hasil prediksi pada 10 frame terakhir atau tidak
#             if np.unique(predictions[-10:])[0]==np.argmax(res):
#                 # Mengecek apakah probabilitas hasil prediksi lebih dari threshold atau tidak
#                 if res[np.argmax(res)] > threshold: 
#                     # Jika dalam sentence sudah ada kata
#                     if len(sentence) > 0: 
#                         # Mengecek action yang sedang dideteksi tidak sama dengan action yang terakhir 
#                         if actions[np.argmax(res)] != sentence[-1]:
#                             # Menambahkan kata ke dalam sentence
#                             sentence.append(actions[np.argmax(res)])
#                     else:
#                         sentence.append(actions[np.argmax(res)])

#             # Jika dalam sentence sudah ada 5 kata
#             if len(sentence) > 5: 
#                 # Mengambil 5 kata yang terakhir dari sentence
#                 sentence = sentence[-5:]

#             # Viz probabilities
#             image = prob_viz(res, actions, image, colors)
        
#         # Mengambar box di window
#         cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
#         # Menaruh setence ke window
#         cv2.putText(image, ' '.join(sentence), (3,30), 
#                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
#         # Show to screen
#         cv2.imshow('OpenCV Feed', image)

#         # Break gracefully
#         if cv2.waitKey(10) & 0xFF == ord('q'):
#             break
#     cap.release()
#     cv2.destroyAllWindows()

In [125]:
cap.release()
cv2.destroyAllWindows()

NameError: name 'cap' is not defined