In [10]:
import cv2
import numpy as np
import mediapipe as mp

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Dropout
import pyttsx3

In [11]:

actions = []

for i in range(65,91):
    actions.append(chr(i))

In [12]:
model = Sequential()

model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(30,126)))
model.add(BatchNormalization())

model.add(LSTM(256, return_sequences=True, activation='relu'))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=False, activation='relu'))

model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(64, activation = 'relu'))

model.add(Dense(32, activation = 'relu'))

model.add(Dense(np.array(actions).shape[0], activation='softmax'))

In [13]:
model.load_weights('testing123.keras')

In [14]:
#Build Keypoints using MP Holistic 
mp_holistic = mp.solutions.holistic # Holistic model 
mp_drawing = mp.solutions.drawing_utils # Drawing utilities 

In [15]:
def mediapipe_detection(image, model): 
    #As to why we are changing the image from BGR2RGB OPENCV reads the image in BGR but mediapipe holistic model needs RGB image for detection
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    results = model.process(image)                 # Make prediction 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR CONVERSION RGB 2 BGR 
    return image, results

In [16]:
def draw_landmarks(image, results): 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,  
                              mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),  
                              mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2) 
                             )
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,  
                              mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),  
                              mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2) 
                             )

In [17]:
def extract_keypoints(results):
    lh = []
    rh = []
    if(results.right_hand_landmarks):
        for res in results.right_hand_landmarks.landmark:
            test = np.array([res.x,res.y,res.z])
            rh.append(test)
    else:
        rh = np.zeros(21*3)
    if(results.left_hand_landmarks):
        for res in results.left_hand_landmarks.landmark:
            test = np.array([res.x,res.y,res.z])
            lh.append(test)
    else:
        lh = np.zeros(21*3)
    lh = np.array(lh).flatten()
    rh = np.array(rh).flatten()
    return np.concatenate([lh,rh])

In [51]:
sequence = []
sentence = []
threshold = 0.8
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.7) as holistic: 
    while cap.isOpened():

        ret, frame = cap.read()

        image , results = mediapipe_detection(frame , holistic)

        if (results.right_hand_landmarks != None) or (results.left_hand_landmarks != None):
            draw_landmarks(image, results)

            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-30:]
            if  len(sequence) == 30:
                res = model.predict(np.expand_dims(sequence,axis=0))[0]
                if(np.max(res)>=threshold):
                    pyttsx3.speak(actions[np.argmax(res)])
                    sequence = []
                    if len(sentence) > 0:
                        if(sentence[-1] != actions[np.argmax(res)]):
                            sentence.append(actions[np.argmax(res)])    
                    else:
                        sentence.append(actions[np.argmax(res)])
                    if len(sentence) > 1:
                        sentence = sentence[-1:]
        
        cv2.putText(image, " ".join(sentence), (200,200),
                                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,255), 2, cv2.LINE_AA)
        cv2.imshow("Feed", image)

        if(cv2.waitKey(10) & 0xFF ) == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25

In [48]:
sequence = []
sentence = []
threshold = 0.8
with mp_holistic.Holistic(min_detection_confidence=0.7, min_tracking_confidence=0.7) as holistic: 
    for frame_num in range(0,30):

        frame = cv2.imread("Testing Data//C.jpg")

        image , results = mediapipe_detection(frame , holistic)

        if (results.right_hand_landmarks != None) or (results.left_hand_landmarks != None):
            draw_landmarks(image, results)

            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-30:]
            if  len(sequence) == 30:
                res = model.predict(np.expand_dims(sequence,axis=0))
                if(np.max(res)>threshold):
                    pyttsx3.speak(actions[np.argmax(res)])
                    # sequence = []
                    if len(sentence) > 0:
                        if(sentence[-1] != actions[np.argmax(res)]):
                            sentence.append(actions[np.argmax(res)])    
                    else:
                        sentence.append(actions[np.argmax(res)])
                    if len(sentence) > 5:
                        sentence = sentence[-5:]
                
        cv2.putText(image, " ".join(str(sentence)), (200,200),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA)
        cv2.imshow("Feed", image)

        if(cv2.waitKey(10) & 0xFF ) == ord('q'):
            break
    cv2.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


In [29]:
# cap.release()
cv2.destroyAllWindows()