# 1. Import and Install Dependencies

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from utils.detector import Detector

# 2. Call detector class

In [2]:
mp_detect = Detector()

In [None]:
# for testing

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

# 3. Setup Folders for Collection

In [None]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('Dataset_Keypoints_Data_wo_face2')
# DATA_PATH = os.path.join('Dataset_Keypoints_Data_wo_face_sintetic') 

# Actions that we try to detect
actions = np.array(['halo', 'nama', 'aku', 'perkenalkan', 'r', 'kami', 'd', 'a', 'n', 'i', 'y', 'l', 'u', 'g', 'm', 'NOTHING'])

# 140 videos worth of data
no_sequences = 200

# Videos are going to be 50 frames in length
sequence_length = 50

# Folder start
# start_folder = 1

In [None]:
# for action in actions: 
# dirmax = np.max(np.array(os.listdir(os.path.join(DATA_PATH, '1'))).astype(int))
for action in actions: 
    for sequence in range(200):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

# 4. Collect Keypoint Values for Training and Testing

In [None]:
# actions = np.array(['halo', 'nama', 'aku', 'perkenalkan', 'r', 'kami', 'd', 'a', 'n', 'i', 'y', 'l', 'u', 'g', 'm', 'NOTHING'])
actions = np.array(['y', 'l', 'u', 'g', 'm'])

In [None]:
clear = lambda: os.system('cls')
    # clear()

In [None]:
for action in actions:
    for sequence in range(150):
        # seq = sequence + 3 - 150
        os.system('cls')

        clear = lambda: os.system('cls')

        clear()
        cap = cv2.VideoCapture("F:/Work/2022/ifest-bisindo-translator/ml/dataset/{}/{}.mp4".format(action, sequence))
        print('video ke {}'.format(sequence))
        with mp_detect.mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
            for frame_num in range(sequence_length):
                if frame_num > 2:
                    # Read feed
                    ret, frame = cap.read()

                    # Make detections
                    image, results = mp_detect.mediapipe_detection(frame, holistic)

                    # Draw landmarks
                    mp_detect.draw_styled_landmarks(image, results)
                    
                    keypoints = mp_detect.extract_keypoints(results)
                    # 
                    npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num-3))
                    np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                        
        cap.release()
        cv2.destroyAllWindows()

In [None]:
print(keypoints)


# 5. Preprocess Data and Create Labels and Features

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
# # actions = np.array(['hai', 'halo', 'nama', 'aku', 'saya','a','b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x' ,'y' ,'z'])
# actions = np.array(['hai', 'halo', 'nama', 'aku', 'saya', 'a','b', 'c'])
actions = np.array(['halo', 'nama', 'aku', 'perkenalkan', 'r', 'kami', 'd', 'a', 'n', 'i', 'y', 'l', 'u', 'g', 'm', 'NOTHING'])


In [None]:
label_map = {label:num for num, label in enumerate(actions)}

In [None]:
label_map

In [None]:
DATA_PATH = os.path.join('Dataset_Keypoints_Data_wo_face2') 
sequences, labels = [], []
for action in actions:
    # for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
    for sequence in range(200):    
        window = []
        for frame_num in range(45):
            # res = np.load("F:/Work/2022/ifest-bisindo-translator/ml/Dataset_Keypoints_Data/{}/{}.npy".format(action, frame_num))
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [None]:
np.array(sequences).shape

In [None]:
np.array(labels).shape

In [None]:
X = np.array(sequences)

In [None]:
X.shape

In [None]:
y = to_categorical(labels).astype(int)

In [None]:
X, X_test, y, y_test = train_test_split(X, y, test_size=0.1, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, shuffle=True)

In [None]:
y_test.shape

In [None]:
y_val.shape

In [None]:
__dataname = ['X_test', 'y_test', 'X_train', 'X_val', 'y_train', 'y_val']
__data = [X_test,y_test, X_train, X_val, y_train, y_val]

for i in range(6):
    with open('{}.txt'.format(__dataname[i]), 'w') as f:
        f.write(__dataname[i])

In [None]:
X_train1, X_train2, y_train1, y_train2 = train_test_split(X_train, y_train, train_size=0.5, shuffle=True)
x_train_ = [X_train1, X_train2]
y_train_ = [y_train1, y_train2]

In [None]:
DATA_PATH = os.path.join('Dataset_Keypoints_Data_wo_face_sintetic') 
sequences2, labels2 = [], []
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(45):
            # res = np.load("F:/Work/2022/ifest-bisindo-translator/ml/Dataset_Keypoints_Data/{}/{}.npy".format(action, frame_num))
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences2.append(window)
        labels2.append(label_map[action])

In [None]:
y_train_2 = to_categorical(labels2).astype(int)
X_train_2 = np.array(sequences2)

# 7. Build and Train LSTM Neural Network

In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten, GRU
from tensorflow.keras.callbacks import TensorBoard, Callback
from tensorflow.keras.optimizers import Adam
from utils.model import Model

In [None]:
model_ = Model()

In [None]:
# model_ = Model()
model_train = model_.train_main(X, y, 16, 1000, True)

In [None]:
class trainingCallback(Callback):
  def on_epoch_end(self, epoch, logs={}):
    
    # Check accuracy
    # if(logs.get('categorical_accuracy') < 0.95  and logs.get('loss') < 0.35 and logs.get('val_loss') < 0.35):
    if((logs.get('categorical_accuracy') > 0.97) or (logs.get('categorical_accuracy') > 0.95  and logs.get('loss') > logs.get('val_loss'))):
      # Stop if threshold is met
      print("\nAccuracy grater than 0.95 so cancelling training!")
      self.model.stop_training = True

# Instantiate class
callbacks = trainingCallback()

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
# LSTM Model
model_LSTM = Sequential()
model_LSTM.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(45,174)))
model_LSTM.add(Dropout(0.5))
model_LSTM.add(LSTM(128, return_sequences=True, activation='relu'))
# model_LSTM.add(LSTM(128, return_sequences=False, activation='relu'))
model_LSTM.add(Flatten())
model_LSTM.add(Dense(256, activation='relu'))
model_LSTM.add(Dropout(0.5))
model_LSTM.add(Dense(64, activation='relu'))
model_LSTM.add(Dropout(0.5))
model_LSTM.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
# GRU Model
model_GRU = Sequential()
model_GRU.add(GRU(128, return_sequences=True, activation='relu', input_shape=(45,174)))
model_GRU.add(Dropout(0.5))
model_GRU.add(GRU(128, return_sequences=True, activation='relu'))
# model_GRU.add(GRU(128, return_sequences=False, activation='relu'))
model_GRU.add(Flatten())
model_GRU.add(Dense(256, activation='relu'))
model_GRU.add(Dropout(0.5))
model_GRU.add(Dense(64, activation='relu'))
model_GRU.add(Dropout(0.5))
model_GRU.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
# GRU-LSTM Model
model_GRU_LSTM = Sequential()
model_GRU_LSTM.add(GRU(128, return_sequences=True, activation='relu', input_shape=(45,174)))
model_GRU_LSTM.add(Dropout(0.5))
model_GRU_LSTM.add(LSTM(128, return_sequences=True, activation='relu'))
# model_GRU_LSTM.add(LSTM(128, return_sequences=False, activation='relu'))
model_GRU_LSTM.add(Flatten())
model_GRU_LSTM.add(Dense(256, activation='relu'))
model_GRU_LSTM.add(Dropout(0.5))
model_GRU_LSTM.add(Dense(64, activation='relu'))
model_GRU_LSTM.add(Dropout(0.5))
model_GRU_LSTM.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
# LSTM-GRU Model
model_LSTM_GRU = Sequential()
model_LSTM_GRU.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(45,174)))
model_LSTM_GRU.add(Dropout(0.5))
model_LSTM_GRU.add(GRU(128, return_sequences=True, activation='relu'))
# model_LSTM_GRU.add(GRU(128, return_sequences=False, activation='relu'))
model_LSTM_GRU.add(Flatten())
model_LSTM_GRU.add(Dense(256, activation='relu'))
model_LSTM_GRU.add(Dropout(0.5))
model_LSTM_GRU.add(Dense(64, activation='relu'))
model_LSTM_GRU.add(Dropout(0.5))
model_LSTM_GRU.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
model_LSTM.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model_GRU.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model_LSTM_GRU.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model_GRU_LSTM.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [None]:
# model_train = model.fit(X_train, y_train, epochs=10, batch_size=64,validation_data=(X_val,y_val), callbacks=[tb_callback])
model_train_GRU = model_GRU.fit(X_train, y_train, epochs=1000, batch_size=64,validation_data=(X_val,y_val), callbacks=[callbacks])

# model.fit(X_train, y_train, epochs=1000, callbacks=[tb_callback])

In [None]:
model_train_LSTM = model_LSTM.fit(X_train, y_train, epochs=1000, batch_size=64,validation_data=(X_val,y_val), callbacks=[callbacks])


In [None]:
model_train_LSTM_GRU = model_LSTM_GRU.fit(X_train, y_train, epochs=1000, batch_size=64,validation_data=(X_val,y_val), callbacks=[callbacks])


In [None]:
model_train_GRU_LSTM = model_GRU_LSTM.fit(X_train, y_train, epochs=1000, batch_size=64,validation_data=(X_val,y_val), callbacks=[callbacks])

In [None]:
loss, val_loss, cat_accuracy = [], [], []
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['categorical_accuracy'])
for _epochs in range(400):
    count = _epochs % 12
    if (count < 10 and (_epochs < 150 or (_epochs < 250 and _epochs > 200))):
        if (count < 4):
            x_train__ = x_train_[0]
            y_train__ = y_train_[0]
        elif (count < 8):
            x_train__ = x_train_[1]
            y_train__ = y_train_[1]
        elif (count < 10):
            x_train__ = X_train_2
            y_train__ = y_train_2
    else :
        x_train__ = X_train
        y_train__ = y_train
    model_train = model.fit(x_train__, y_train__, epochs=_epochs+1, batch_size=64,validation_data=(X_val,y_val),initial_epoch=_epochs)
    _loss = model_train.history['loss']
    _val_loss = model_train.history['val_loss']
    _cat_accuracy = model_train.history['categorical_accuracy']
    loss.append(_loss[len(_loss)-1])
    val_loss.append(_val_loss[len(_loss)-1])
    cat_accuracy.append(_cat_accuracy[len(_loss)-1])
    if (loss[len(loss)-1] < val_loss[len(loss)-1] and cat_accuracy[len(loss)-1] > 0.96):
        break





In [None]:
print("LSTM")
model_LSTM.summary()
print("GRU")
model_GRU.summary()
print("LSTM-GRU")
model_LSTM_GRU.summary()
print("GRU-LSTM")
model_GRU_LSTM.summary()


In [None]:
# Get training and test loss histories
training_loss = model_train.history['loss']
test_loss = model_train.history['val_loss']
avg = []

for i in range(len(loss)):
    sum = loss[i]+val_loss[i]
    avg.append(sum/2)

# avg = np.array([training_loss, test_loss])
# np.average(avg)

# Create count of the number of epochs
epoch_count = range(1, len(loss) + 1)

# Visualize loss history
plt.plot(epoch_count, loss, 'r--')
plt.plot(epoch_count, val_loss, 'b-')
plt.plot(epoch_count, avg, 'g-')
plt.legend(['Training Loss', 'Test Loss', 'Average'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()
# print(avg)

# 8. Make Predictions

In [10]:
res = model.predict(X_test)

NameError: name 'model' is not defined

In [None]:
actions[np.argmax(res[3])]

In [None]:
actions[np.argmax(y_test[3])]

# 9. Save Weights

In [None]:
model.save('model_6.h5')

In [None]:
del model

In [None]:
# LSTM Model
model = Sequential()
model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(45,174)))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=True, activation='relu'))
# model.add(LSTM(128, return_sequences=False, activation='relu'))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activation='softmax'))

In [None]:
model.load_weights('model_6.h5')

# 10. Evaluation using Confusion Matrix and Accuracy

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [None]:
yhat = model.predict(X_test)

In [None]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [None]:
confussionMatrix = multilabel_confusion_matrix(ytrue, yhat)
from sklearn.metrics import confusion_matrix
confussionMatrix = confusion_matrix(ytrue, yhat)

In [None]:
print(confussionMatrix)
with open('confussionMatrixModel_2.txt', 'w') as f:
    f.write(str(confussionMatrix))

In [None]:
accuracy = accuracy_score(ytrue, yhat)
print(accuracy)
# with open('accuracyScoreModel_2.txt', 'w') as f:
#     f.write(str(accuracy))

In [None]:
from sklearn.metrics import classification_report
print('\nClassification Report\n')
print(classification_report(ytrue, yhat, target_names=actions))

In [None]:
import pandas as pd
cm_df = pd.DataFrame(confussionMatrix,
                     index = actions, 
                     columns = actions)

import seaborn as sns
#Plotting the confusion matrix
plt.figure(figsize=(10,8))
sns.heatmap(cm_df, annot=True)
plt.title('Confusion Matrix')
plt.ylabel('Actual Values')
plt.xlabel('Predicted Values')
plt.show()


# 11. Test in Real Time

In [None]:
from scipy import stats

In [None]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        # cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [None]:
plt.figure(figsize=(18,18))
plt.imshow(prob_viz(res, actions, image, colors))

In [None]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5
frame_save = []

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_detect.mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()
        frame_save.append(frame)
        # Make detections
        image, results = mp_detect.mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        mp_detect.draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = mp_detect.extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-45:]
        
        if len(sequence) == 45:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            # if len(sentence) > 5: 
            #     sentence = sentence[-5:]

            # Viz probabilities
            # image = prob_viz(res, actions, image, colors)

        # cv2.rectangle(image, (0,0), (320, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    # clear = lambda: os.system('cls')
    # clear()
    print(sentence)

# Test using batch processing

In [3]:
sequence = []
sentence = []
predictions = []
threshold = 0.5
frame_save = []
i = 0

cap = cv2.VideoCapture("C:/Users/MDaniyalK/Documents/Work/2022/Tutur-ifest app/IMG_3705.mp4")
with mp_detect.mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    # for frame_num in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):
    while True:
        # if frame_num > 3:
        ret, frame = cap.read()
        if ret:
            frame_save.append(frame)
        else:
            break

        # Make detections
        # image, results = mp_detect.mediapipe_detection(frame, holistic)
        # # print(results)
        
        # # Draw landmarks
        # mp_detect.draw_styled_landmarks(image, results)
        
        # # 2. Prediction logic
        # keypoints = mp_detect.extract_keypoints(results)
        # # 
        # # npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num-4))
        # # np.save(npy_path, keypoints)

        # sequence.append(keypoints)
        # sequence = sequence[-45:]
        
        # if len(sequence) == 45:
        #     res = model.predict(np.expand_dims(sequence, axis=0))[0]
        #     print(actions[np.argmax(res)])
        #     predictions.append(np.argmax(res))
            
            
        # #3. Viz logic
        #     if np.unique(predictions[-10:])[0]==np.argmax(res): 
        #         if res[np.argmax(res)] > threshold: 
                    
        #             if len(sentence) > 0: 
        #                 if actions[np.argmax(res)] != sentence[-1]:
        #                     sentence.append(actions[np.argmax(res)])
        #             else:
        #                 sentence.append(actions[np.argmax(res)])

        #     # if len(sentence) > 5: 
        #     #     sentence = sentence[-5:]

        #     # Viz probabilities
        #     # image = prob_viz(res, actions, image, colors)

        # # cv2.rectangle(image, (0,0), (320, 40), (245, 117, 16), -1)
        # cv2.putText(image, ' '.join(sentence), (3,30), 
        #                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        # cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
                
    cap.release()
    cv2.destroyAllWindows()
    clear = lambda: os.system('cls')
    clear()

print(sentence)

[]


In [4]:
print('length : {} frames'.format(len(frame_save)))
print(frame_save)

length : 593 frames
[array([[[ 25,  34,  35],
        [ 25,  34,  35],
        [ 25,  34,  35],
        ...,
        [ 10,  18,  17],
        [ 11,  19,  18],
        [ 11,  19,  18]],

       [[ 25,  34,  35],
        [ 25,  34,  35],
        [ 25,  34,  35],
        ...,
        [ 10,  18,  17],
        [ 11,  19,  18],
        [ 11,  19,  18]],

       [[ 25,  34,  35],
        [ 25,  34,  35],
        [ 25,  34,  35],
        ...,
        [ 10,  18,  17],
        [ 11,  19,  18],
        [ 11,  19,  18]],

       ...,

       [[ 23,  33,  37],
        [ 29,  39,  43],
        [ 37,  47,  51],
        ...,
        [121, 145, 155],
        [121, 145, 155],
        [121, 145, 155]],

       [[ 26,  36,  40],
        [ 33,  43,  47],
        [ 42,  52,  56],
        ...,
        [121, 145, 155],
        [121, 145, 155],
        [121, 145, 155]],

       [[ 32,  42,  46],
        [ 40,  50,  54],
        [ 50,  60,  64],
        ...,
        [121, 145, 155],
        [121, 145, 155],
   

In [5]:
from utils.extractor import Extractor

In [6]:
extract = Extractor()
print(extract.actions)

['halo' 'nama' 'aku' 'perkenalkan' 'r' 'kami' 'd' 'a' 'n' 'i' 'y' 'l' 'u'
 'g' 'm' 'NOTHING']


In [25]:
def frames_to_keypoint(frames, len_frame):
        #TODO: Create frames to keypoint detection from model (batch processing)
        keypoints = []
        predictions = []
        model = Sequential()
        model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(45,174)))
        model.add(Dropout(0.5))
        model.add(LSTM(128, return_sequences=True, activation='relu'))
        # model.add(LSTM(128, return_sequences=False, activation='relu'))
        model.add(Flatten())
        model.add(Dense(256, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(16, activation='softmax'))
        
        model.load_weights('model_6.h5')
        
        with mp_detect.mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
            for i in range(len_frame):
                image, results = mp_detect.mediapipe_detection(frames[i], holistic)
                keypoint = mp_detect.extract_keypoints(results)
                keypoints.append(keypoint)
                keypoints = keypoints[-45:]
                if len(keypoints) == 45:
                    res = model.predict(np.expand_dims(keypoints, axis=0))[0]
                    if len(predictions) > 0:
                        if predictions[len(predictions)-1] != extract.actions[np.argmax(res)]:
                            predictions.append(extract.actions[np.argmax(res)])
                            # keypoints = keypoints[-43:]
                            # i+=2
                        else:
                            i += 5
                            keypoints = keypoints[-35:]
                    else:
                        predictions.append(extract.actions[np.argmax(res)])
                        # keypoints = keypoints[-43:]
                        # i+=2
        
        return predictions

In [7]:

output = extract.frames_to_keypoint(frame_save, len(frame_save))



In [8]:
print(output)

['a', 'halo', 'a', 'g', 'd', 'a', 'g', 'perkenalkan', 'm', 'nama', 'm']


In [None]:
from skimage.metrics import structural_similarity as ssim
sequence = []
sentence = []
predictions = []
threshold = 0.5
isStart = False
frame = frame_save
with mp_detect.mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    for i in range(len(frame)):
        if i > 0:
            if count > 44:
                isStart == False
            if (isStart == False):
                count = 0
                simlarityIndex = ssim(frame[i-1], frame[i], multichannel = True)
            if simlarityIndex < 0.8 and count < 45:
                isStart == True
                print("motion detected")
                # Make detections
                image, results = mp_detect.mediapipe_detection(frame[i], holistic)
                print(results)
                
                # Draw landmarks
                mp_detect.draw_styled_landmarks(image, results)
                
                # 2. Prediction logic
                keypoints = mp_detect.extract_keypoints(results)
                # 
                # npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num-4))
                # np.save(npy_path, keypoints)

                sequence.append(keypoints)
                # sequence = sequence[-45:]
                
                if len(sequence) == 45:
                    res = model.predict(np.expand_dims(sequence, axis=0))[0]
                    print(actions[np.argmax(res)])
                    predictions.append(np.argmax(res))
                    sequence = []
                    
                    
                #3. Viz logic
                    if np.unique(predictions[-10:])[0]==np.argmax(res): 
                        if res[np.argmax(res)] > threshold: 
                            
                            if len(sentence) > 0: 
                                if actions[np.argmax(res)] != sentence[-1]:
                                    sentence.append(actions[np.argmax(res)])
                            else:
                                sentence.append(actions[np.argmax(res)])

                    # if len(sentence) > 5: 
                    #     sentence = sentence[-5:]

                    # Viz probabilities
                    # image = prob_viz(res, actions, image, colors)
                    

                count+=1
                
            i+=1

print(sentence)

In [None]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5
frame_save = []

cap = cv2.VideoCapture("C:/Users/MDaniyalK/Documents/Work/2022/Tutur-ifest app/IMG_3705.mp4")
# Set mediapipe model 
with mp_detect.mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while True:

        # Read feed
        ret, frame = cap.read()
        # while ret == True:
            
        b = cv2.resize(frame,(720,1280),fx=0,fy=0, interpolation = cv2.INTER_CUBIC)
        # Make detections
        frame = b
        image, results = mp_detect.mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        mp_detect.draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = mp_detect.extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-45:]
        
        if len(sequence) == 45:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            # if len(sentence) > 5: 
            #     sentence = sentence[-5:]

            # Viz probabilities
            # image = prob_viz(res, actions, image, colors)

        # cv2.rectangle(image, (0,0), (320, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        # cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    clear = lambda: os.system('cls')
    clear()
    print(sentence)