In [None]:
import cv2
import numpy as np
import os
import mediapipe as mp
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical  
from tensorflow.keras.models import Sequential  
from tensorflow.keras.layers import LSTM, Dense , Dropout
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
from tensorflow.keras.callbacks import EarlyStopping

#draw connections line based on results at image given 
def draw_styled_landmarks(image, results):
    #selected mpHolistic show what related landmark/point connected to it 

    #color is BGR format
    #first drawingSpec is color landmark, second drawingSpec is color conne
    # Draw face connections
    mpDrawing.draw_landmarks(image, results.face_landmarks, mpHolistic.FACEMESH_TESSELATION, 
                             mpDrawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mpDrawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mpDrawing.draw_landmarks(image, results.pose_landmarks, mpHolistic.POSE_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(0,128,255), thickness=2, circle_radius=4), 
                             mpDrawing.DrawingSpec(color=(204,102,0), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mpDrawing.draw_landmarks(image, results.left_hand_landmarks, mpHolistic.HAND_CONNECTIONS, 
                             mpDrawing.DrawingSpec(color=(127,0,255), thickness=2, circle_radius=4), 
                             mpDrawing.DrawingSpec(color=(255,153,204), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mpDrawing.draw_landmarks(image, results.right_hand_landmarks, mpHolistic.HAND_CONNECTIONS, 
                             mpDrawing.DrawingSpec(color=(0,204,204), thickness=2, circle_radius=4), 
                             mpDrawing.DrawingSpec(color=(102,255,255), thickness=2, circle_radius=2)
                             ) 

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # BGR to RGB
    image.flags.writeable = False                  # Make image not writeable so processing step wont affect image data
    results = model.process(image)                 # Holistic library start tracking keypoints on image
    image.flags.writeable = True                   # Make image writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # RGB to BGR
    return image, results

def extract_keypoints(results):
    pose = []
    face = []
    leftHand = []
    rightHand = []
    
    if results.pose_landmarks:
        for res in results.pose_landmarks.landmark:
            pose.append([res.x, res.y, res.z, res.visibility])
        pose = np.array(pose).flatten()  # Convert the list to a NumPy array and flatten it into 1 big array
    else:
        pose = np.zeros(132)  # Initialize with zeros

    if results.face_landmarks:
        for res in results.face_landmarks.landmark:
            face.append([res.x, res.y, res.z])
        face = np.array(face).flatten()  # Convert the list to a NumPy array and flatten it 1 big array
    else:
        face = np.zeros(468*3)  # Initialize with zeros 

    if results.left_hand_landmarks:
        for res in results.left_hand_landmarks.landmark:
            leftHand.append([res.x, res.y, res.z])
        leftHand = np.array(leftHand).flatten()  # Convert the list to a NumPy array and flatten it 1 big array
    else:
        leftHand = np.zeros(21*3)  # Initialize with zeros 

    if results.right_hand_landmarks:
        for res in results.right_hand_landmarks.landmark:
            rightHand.append([res.x, res.y, res.z])
        rightHand = np.array(rightHand).flatten()  # Convert the list to a NumPy array and flatten it 1 big array
    else:
        rightHand = np.zeros(21*3)  # Initialize with zeros     

    # return 1 big NumPy array that has 1662 values in total for 1 frame (pose, face, leftHand, rightHand)
    return np.concatenate([pose, face, leftHand, rightHand])
    
#declare mediapipe variable
mpHolistic = mp.solutions.holistic # Holistic model
mpDrawing = mp.solutions.drawing_utils # Drawing tool for holistic model

#declare camera variable
cap_width = 1280
cap_height = 780
camera = cv2.VideoCapture(0)
camera.set(cv2.CAP_PROP_FRAME_WIDTH, cap_width)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, cap_height)


sequence = []
sentence = []
predictions = []
threshold = 0.8

# Actions that we try to detect
actions = np.array(['hello', 'thanks', 'yes', 'no', 'please'])

cap = cv2.VideoCapture(0)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(45, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.load_weights('model1.h5')



with mpHolistic.Holistic(min_detection_confidence=0.8, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Capture image from camera
        isCaptured, frame = camera.read() #isCaptured is True if camera successfully read
        
        if not isCaptured:
            break

        frame = cv2.flip(frame, 1)  # Mirror display
        
        # Make detections (1 frame image, and result for that 1 frame)
        image, results = mediapipe_detection(frame, holistic)

        # Draw landmarks
        draw_styled_landmarks(image, results)

        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        if len(sequence) == 30:
            result = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(result)])
            predictions.append(np.argmax(result))

            
            if np.unique(predictions[-10:])[0]== np.argmax(result):
                if result[np.argmax(result)] > threshold:
                    if len(sentence) > 0:
                        if actions[np.argmax(result)] != sentence[-1]:
                            sentence.append(actions[np.argmax(result)])
                    else:
                        sentence.append(actions[np.argmax(result)])

            if len(predictions) > 5:
                predictions = predictions[-5:]

            if len(sentence) > 5:
                sentence = sentence[-5:]


        cv2.rectangle(image, (0, 0), (1280, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (500, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Show to screen
        cv2.imshow('Sign Translator', image)

        # Check if 'c' is pressed to clear the 'sentence' array
        key2 = cv2.waitKey(10)
        if key2 == ord('c'):
            sentence.clear()

        # quit screen
        key = cv2.waitKey(10)
        if key == 113:  # 113 is the ASCII code for 'q'
            break
                
    camera.release()
    cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
yes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
no
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
no
[1m1/1[0m 