In [1]:
import cv2
import numpy as np
import mediapipe as mp

from Data_Collection import definitions
from Data_Collection import instantiation
from Data_Collection import detection
from Data_Collection import draw_landmarks
from Data_Collection import extract_keypoints

from tensorflow.keras.models import load_model

test = True

In [2]:
width_cam, height_cam, fps, actions, num_sequence, sequence_len, path = definitions()
mp_holistic, mp_drawing = instantiation()

### Testing in Real Time

In [3]:
if test:
    model = load_model('action.h5')
    # Detection variables
    # First detect 30 frames and store it in sequence and then pass it to prediction to start predicting
    sequence = []

    # Concatenate the history of detections
    history = []

    # Threshold for rendering the result to the screen
    threshold = 0.8

    # Accessing web-camera using opencv in BGR format (default) 
    cap = cv2.VideoCapture(0)
    cap.set(3, width_cam)
    cap.set(4, height_cam)
    cap.set(cv2.CAP_PROP_FPS, 30)

    mp_model = mp.solutions.holistic.Holistic(min_detection_confidence=0.5,
                                                    min_tracking_confidence=0.5)

    while True:
        # Read current frame
        ret, img = cap.read()

        # Image detection
        img, results = detection(img, mp_model)
        #print(results)

        # Draw Landmarks
        draw_landmarks(img, results)

        # Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        #print(len(sequence))
        # Grab the last 30 keypoints
        sequence = sequence[-30:]

        # Run the prediction if and only if the number of frames in the sequence is 30 or more
        if len(sequence) == 30:
            # expand_dim allows us to run through one sequence (video) at a time in order to make the prediction
            # Since the user will be only showing only one action at a time, we do not require to pass
            # through all the actions, that is only one video (30 frames) at a time can be used to make predictions
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            #print (res)
            #print(actions[np.argmax(res)])

            # Visualization Logic
            if res[np.argmax(res)] > threshold :
                if len(history) > 0:
                    if (actions[np.argmax(res)] != history[-1] and
                        (results.right_hand_landmarks or results.left_hand_landmarks)):
                        history.append(actions[np.argmax(res)])
                    else:
                        pass
                elif (results.right_hand_landmarks or results.left_hand_landmarks):
                    history.append(actions[np.argmax(res)])

            #print("History After", history)
            if len(history) > 5:
                history = history[-5:]

            cv2.rectangle(img, (0,0), (340,40), (245, 117, 16), -1)
            if results.right_hand_landmarks or results.left_hand_landmarks:
                cv2.putText(img, str(history[-1]), (3,30),
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

            # Show the feed on screen
            cv2.imshow('Sign-Language Detection', img)

            # Break the loop using the key 'q'
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    cap.release()
    cv2.destroyAllWindows()











