# IMPORT LIBRARY

In [4]:
import cv2
import numpy as np 
import os  
from matplotlib import pyplot as plt 
import time
import mediapipe as mp




# FUNCTION DEFINITION FOR OBJECT DETECTION

In [5]:
mp_holistic = mp.solutions.holistic #holistic model
mp_drawing = mp.solutions.drawing_utils #drawing utilities

In [45]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #COLOR CONVERSION
    image.flags.writeable = False #image is no longer writeable
    results = model.process(image) #make prediction
    image.flags.writeable = True #image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) #COLOR COVERSION
    return image, results

In [44]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [43]:
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(121,22,76), thickness = 2, circle_radius= 4), mp_drawing.DrawingSpec(color=(121,44,250), thickness = 2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, mp_drawing.DrawingSpec(color=(245,117,66), thickness = 2, circle_radius= 4), mp_drawing.DrawingSpec(color=(245,66,230), thickness = 2, circle_radius=2))

# TESTING THE OBJECT DETECTION

In [46]:
cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic :
    while cap.isOpened():
        #Read Feed
        ret, frame = cap.read()
        #make detection
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        #draw landmarks
        draw_styled_landmarks(image, results)
        #Show to screen
        cv2.imshow('OpenCV Feed', image)
        
        #break gracefully
        key = cv2.waitKey(10)
        if key == 27 :
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

# EXTRACTING KEYPOINTS FROM THE OBJECT LANDMARK

In [56]:
def extract_keypoint(results):
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([left_hand,right_hand])

# MAKING DIRECTORY FOR DATASETS

In [113]:
DATA_PATH = os.path.join('MP_Data')
actions = np.array(['hello','thank you','i love you'])
no_sequences = 5
sequence_length = 5

In [116]:
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

# COLLECTING DATA

In [58]:
cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic :
    #loop through actions
    for action in actions:
        #loop through sequences/videos
        for sequence in range(no_sequences):
            #loop through video length
            for frame_num in range(sequence_length):
                
                #Read Feed
                ret, frame = cap.read()
                #make detection
                image, results = mediapipe_detection(frame, holistic)
                print(results)
                #draw landmarks
                draw_styled_landmarks(image, results)
                #apply wait logic
                if frame_num == 0:
                    cv2.putText(image,'STARTING COLLECTION',(120,200), cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),4,cv2.LINE_AA)
                    cv2.putText(image,'Collecting frames for {} Video Number {}'.format(action,sequence),(15,12),cv2.FONT_HERSHEY_SIMPLEX, 0.5,(0,0,255),1,cv2.LINE_AA)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action,sequence), (15,12),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
                
                #New export keypoints
                keypoints = extract_keypoint(results)
                npy_path = os.path.join(DATA_PATH,action,str(sequence), str(frame_num))
                np.save(npy_path, keypoints)
                
                #Show to screen
                cv2.imshow('OpenCV Feed', image)
        
                #break gracefully
                key = cv2.waitKey(10)
                if key == 27 :
                    break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

In [57]:
cap.release()
cv2.destroyAllWindows()

# PREPROCCESS DATA, CREATE LABELS, AND FEATURES

In [59]:
from sklearn.model_selection import train_test_split
import tensorflow
from tensorflow import keras
from keras import utils, layers, models, callbacks
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import TensorBoard

In [60]:
label_map = {label:num for num, label in enumerate(actions)}

In [61]:
label_map

{'hello': 0, 'thank you': 1, 'i love you': 2}

In [62]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])
        

In [63]:
np.array(sequences).shape

(15, 5, 126)

In [64]:
np.array(labels).shape

(15,)

In [65]:
X = np.array(sequences)

In [66]:
y = to_categorical(labels).astype(int)

In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)

# DATASETS TRAINING

In [68]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [69]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [87]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape = (5,126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(62, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [76]:
res = [0.7, 0.2, 0.1]

In [77]:
actions[np.argmax(res)]

'hello'

In [88]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [79]:
model.fit(X_train, y_train, epochs=1350, callbacks=[tb_callback])

Epoch 1/1350
Epoch 2/1350
Epoch 3/1350
Epoch 4/1350
Epoch 5/1350
Epoch 6/1350
Epoch 7/1350
Epoch 8/1350
Epoch 9/1350
Epoch 10/1350
Epoch 11/1350
Epoch 12/1350
Epoch 13/1350
Epoch 14/1350
Epoch 15/1350
Epoch 16/1350
Epoch 17/1350
Epoch 18/1350
Epoch 19/1350
Epoch 20/1350
Epoch 21/1350
Epoch 22/1350
Epoch 23/1350
Epoch 24/1350
Epoch 25/1350
Epoch 26/1350
Epoch 27/1350
Epoch 28/1350
Epoch 29/1350
Epoch 30/1350
Epoch 31/1350
Epoch 32/1350
Epoch 33/1350
Epoch 34/1350
Epoch 35/1350
Epoch 36/1350
Epoch 37/1350
Epoch 38/1350
Epoch 39/1350
Epoch 40/1350
Epoch 41/1350
Epoch 42/1350
Epoch 43/1350
Epoch 44/1350
Epoch 45/1350
Epoch 46/1350
Epoch 47/1350
Epoch 48/1350
Epoch 49/1350
Epoch 50/1350
Epoch 51/1350
Epoch 52/1350
Epoch 53/1350
Epoch 54/1350
Epoch 55/1350
Epoch 56/1350
Epoch 57/1350
Epoch 58/1350
Epoch 59/1350
Epoch 60/1350
Epoch 61/1350
Epoch 62/1350
Epoch 63/1350
Epoch 64/1350
Epoch 65/1350
Epoch 66/1350
Epoch 67/1350
Epoch 68/1350
Epoch 69/1350
Epoch 70/1350
Epoch 71/1350
Epoch 72/1350
E

<keras.src.callbacks.History at 0x219aa59e790>

In [80]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   


 lstm_6 (LSTM)               (None, 5, 64)             48896     
                                                                 
 lstm_7 (LSTM)               (None, 5, 128)            98816     
                                                                 
 lstm_8 (LSTM)               (None, 64)                49408     
                                                                 
 dense_6 (Dense)             (None, 62)                4030      
                                                                 
 dense_7 (Dense)             (None, 32)                2016      
                                                                 
 dense_8 (Dense)             (None, 3)                 99        
                                                                 
Total params: 203265 (794.00 KB)
Trainable params: 203265 (794.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# MAKE PREDICTION

In [81]:
model.predict(X_test)



array([[1.0000000e+00, 1.3437465e-19, 4.9248127e-26]], dtype=float32)

In [108]:
actions[np.argmax(res[2])]

'hello'

In [110]:
actions[np.argmax(y_test[0])]

'hello'

In [85]:
model.save('action.h5')

  saving_api.save_model(


In [86]:
del model

In [89]:
model.load_weights('action.h5')

# EVALUATE THE MODEL USING CONFUSION MATRIX AND ACCURACY

In [90]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [91]:
yhat = model.predict(X_test)



In [92]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [93]:
ytrue

[0]

In [94]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[0, 0],
        [0, 1]]], dtype=int64)

In [95]:
accuracy_score(ytrue, yhat)

1.0

# TEXT TO VOICE

In [96]:
from gtts import gTTS
from  pygame import mixer

In [97]:
def sound(text, language):
    output = gTTS(text=text, lang=language, slow=True)
    output.save(f'{text}.mp3')
    mixer.init()
    mixer.music.load(f'{text}.mp3')
    mixer.music.play()
    time.sleep(1)
    mixer.music.unload()
    os.remove(f'{text}.mp3')

# TEST IN REAL TIME

In [98]:
#new detection variables
sequence = []
sentence = []
temp= 0
threshold = 0.7

cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic :
    while True:
        #Read Feed
        ret, frame = cap.read()
        #make detection
        image, results = mediapipe_detection(frame, holistic)
        # print(results)
        #draw landmarks
        draw_styled_landmarks(image, results)
        #prediction logic
        keypoints = extract_keypoint(results)
        sequence.insert(0,keypoints)
        sequence = sequence[:30]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            temp += 1
            if (temp > 50):
                temp = 0
                sound(actions[np.argmax(res)], 'en')
            
        #visualization logic
        if res[np.argmax(res)] > threshold:
            if len(sentence) > 0:
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])
        if len(sentence) > 5:
            sentence = sentence[-5:]
            
        cv2.rectangle(image, (0,0), (640,40), (245,117,16), -1)
        cv2.putText(image, actions[np.argmax(res)], (3,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        #Show to screen
        cv2.imshow('OpenCV Feed', image)
        
        time.sleep(0.1)
        
        #break gracefully
        key = cv2.waitKey(10)
        if key == 27 :
            break
    cap.release()
    cv2.destroyAllWindows()

hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello
hello


In [36]:
cap.release()
cv2.destroyAllWindows()