## 1. Import and Install Dependencies

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# 2. Keypoints using MP Holistic

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [3]:
def mediapipe_detection(image , model):
    image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                     # Image is no longer writable
    results = model.process(image)                  # Make prediction
    image.flags.writeable = True                     # Image is now writable
    image = cv2.cvtColor(image , cv2.COLOR_RGB2BGR)   # COLOR CONVERSION RGB 2 BGR
    return image , results

In [4]:
# Function to draw styled landmarks to the frame
def draw_styled_landmarks(image, results):
    # Drawing face landmarks 
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=1, circle_radius=1)
                             )
    
    #Drawing pose landmarks
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=2, circle_radius=2)
                             )
    
    # Drawing left hand landmarks
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    
    # Drawing right hand landmarks
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

# 3. Extract Keypoint Values

In [5]:
def extract_holistic_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

def extract_left_hand_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh])

def extract_right_hand_keypoints(results):
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([rh])

def extract_face_keypoints(results):
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([face])

def extract_pose_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    return np.concatenate([pose])

def extract_both_hand_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])

# 4. Setup Folders for Collection

In [6]:
DATA_PATH_FOR_BOTH_HAND_KEYPOINTS = os.path.join('Dataset')

# Action that we try to detect
actions = np.array(['Hey' , 'There' , 'Hello' , 'How' , 'Are' , 'You' , 'I am' , 'Good' , 'Fine' , 'I Love You' , 'Please' , 'Stop This' , 'Stay Here' , 'Help' , 'No' , 'Victory' , 'Walk' , 'Pray'])

# Thirty videos worth of data
no_sequences = 10000

# Each video will be going to 30 frames in length
sequence_length = 1

In [10]:
# Creating folder to store data
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH_FOR_BOTH_HAND_KEYPOINTS, action, str(sequence))) # Directory to store BOTH HAND keypoint
        except:
            pass

# 5. Collection Keypoint Values for Training and Testing

In [9]:
# Accessing the camera
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FPS , 23)

# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence = 0.5 , min_tracking_confidence = 0.5) as holistic:
    
    #looping through action
    for action in actions:
        # looping through sequences of videos
        for sequence in range(no_sequences):
            #looing through video length sequence length
            for frame_num in range(sequence_length):
                
                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame , holistic)

                # Draw landmarks
                draw_styled_landmarks(image , results)
                
                # Apply wait logic
                if frame_num == 0 :
                    cv2.putText(image, 'START COLLECTION', (120,200),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} video number {}'.format(action, sequence), (15, 12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Feed' , image)
                    # cv2.waitKey(2000)
                else:
                    cv2.putText(image, 'Collecting frames for {} video number {}'.format(action, sequence), (15, 12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1, cv2.LINE_AA)
                    cv2.imshow('OpenCV Feed' , image)
                
                # Export BOTH HAND keypoints
                both_hand_keypoints = extract_both_hand_keypoints(results)
                npy_both_hand_path = os.path.join(DATA_PATH_FOR_BOTH_HAND_KEYPOINTS, action, str(sequence), str(frame_num))
                np.save(npy_both_hand_path, both_hand_keypoints)
                
                # Show to the screen
                # cv2.imshow('OpenCV Feed' , frame)
                
    
                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
        cv2.waitKey(10000)
        
# Releasing the web camera
cap.release()

cv2.destroyAllWindows()

KeyboardInterrupt: 

In [10]:
# Releasing the web camera
cap.release()

cv2.destroyAllWindows()

# 6. Preprocess Data and Create Lables and Features

In [7]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [8]:
label_map = {lable:num for num, lable in enumerate(actions)}

In [9]:
label_map

{'Hey': 0,
 'There': 1,
 'Hello': 2,
 'How': 3,
 'Are': 4,
 'You': 5,
 'I am': 6,
 'Good': 7,
 'Fine': 8,
 'I Love You': 9,
 'Please': 10,
 'Stop This': 11,
 'Stay Here': 12,
 'Help': 13,
 'No': 14,
 'Victory': 15,
 'Walk': 16,
 'Pray': 17}

In [10]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH_FOR_BOTH_HAND_KEYPOINTS, action, str(sequence) , "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [11]:
# defining the value of X cordinates
x = np.array(sequences)

In [12]:
# Defining the value of Y cordinates
y = to_categorical(labels).astype(int)

In [13]:
#split the data for the train and testing
# We are testing with 5% of the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# 7. Built and Traing LSTM Neural Network

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM , Dense
from tensorflow.keras.callbacks import TensorBoard

In [11]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [12]:
# Creating the DEEP LEARNING model
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(1,126)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [13]:
# Compiling the model
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [18]:
model.fit(x_train, y_train, epochs=2000, callbacks=[tb_callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
 107/3938 [..............................] - ETA: 29s - loss: 0.0055 - categorical_accuracy: 0.9980

KeyboardInterrupt: 

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1, 64)             48896     
                                                                 
 lstm_1 (LSTM)               (None, 1, 128)            98816     
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 18)                594       
                                                                 
Total params: 203,954
Trainable params: 203,954
Non-trai

# 8. Make predictions

In [20]:
res=model.predict(x_test)



In [21]:
actions[np.argmax(res[2])]

'Stay Here'

In [22]:
actions[np.argmax(y_test[2])]

'Stay Here'

# 9. Save Weights

In [14]:
model.save('18_action(word_sentance)_both_hand.h5')

In [24]:
# del model

In [14]:
model.load_weights('16_action(word_sentance)_both_hand.h5')

# 10. Evaluation using Confussion Matrix and Accuracy 

In [16]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [27]:
yhat = model.predict(x_test)



In [28]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [29]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[50950,     0],
        [    5,  3045]],

       [[50938,     2],
        [    0,  3060]],

       [[51049,     1],
        [   13,  2937]],

       [[51083,     2],
        [    5,  2910]],

       [[50990,     3],
        [   14,  2993]],

       [[51025,    12],
        [   21,  2942]],

       [[51001,     1],
        [    5,  2993]],

       [[51004,     1],
        [    4,  2991]],

       [[50925,     0],
        [    0,  3075]],

       [[50964,     3],
        [    0,  3033]],

       [[50900,    69],
        [    0,  3031]],

       [[51056,    10],
        [    8,  2926]],

       [[51013,     5],
        [   15,  2967]],

       [[51044,     3],
        [    6,  2947]],

       [[50953,     4],
        [   13,  3030]],

       [[50990,     1],
        [    1,  3008]],

       [[50956,     5],
        [    1,  3038]],

       [[51037,     0],
        [   11,  2952]]], dtype=int64)

In [30]:
accuracy_score(ytrue, yhat)

0.9977407407407407

# 11. Test in Reallife

In [15]:
# Function to display probability in the realtiome
colors = [(245,117,16), (117,245,16), (16,117,245) , (210,100,20), (100,190,50), (245,117,16), (117,245,16), (16,117,245) , (210,100,20), (100,190,50), (245,117,16), (117,245,16), (16,117,245) , (210,100,20), (100,190,50), (245,117,16), (117,245,16), (16,117,245) , (210,100,20), (100,190,50), (245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

In [16]:
# Import library to convert text into speech
import pyttsx3
engine = pyttsx3.init()

# Import libraries to make async call to the speech function
import multiprocessing
import os

# Import library to perform multithreadting
import threading

# Function that will convert text to the speak
def texToSpeech(word):
    engine.say("hii")
    engine.runAndWait()
    

# 1. New detection variables
sequence = []
sentence = []
threshold = 0.95

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_both_hand_keypoints(results)
#         sequence.insert(0,keypoints)
#         sequence = sequence[:30]
        sequence.append(keypoints)
        sequence = sequence[-1:]
        
        if len(sequence) == 1:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])            
            
        #3. Viz logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
#                         p = multiprocessing.Process(target=texToSpeech , args = actions[np.argmax(res)])
#                         p = multiprocessing.Process(target=texToSpeech)
#                         p.start()
#                         t = threading.Thread(target = texToSpeech , args = actions[np.argmax(res)])
#                         t.start()
#                         engine.say(actions[np.argmax(res)])
#                         engine.runAndWait()
                else:
                    sentence.append(actions[np.argmax(res)])
#                     p = multiprocessing.Process(target=texToSpeech , args = actions[np.argmax(res)])
#                     p = multiprocessing.Process(target=texToSpeech )
#                     p.start()
#                     t = threading.Thread(target = texToSpeech , args = actions[np.argmax(res)])
#                     t.start()
#                     engine.say(actions[np.argmax(res)])
#                     engine.runAndWait()

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        scale_percent = 150 # percent of original size
        width = int(image.shape[1] * scale_percent / 100)
        height = int(image.shape[0] * scale_percent / 100)
        dim = (width, height)

        # resize image
        image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Hey
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Please
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Please
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Please
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Please
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Please
<class 'm

In [228]:
np.array(sequence).shape

(1, 1662)

In [35]:
from multiprocessing import Pool
def f(x):
    print(x*x)
    return x*x

if __name__ == '__main__':
    pool = Pool(processes=1)              # Start a worker processes.
    result = pool.apply_async(f, [10]) # Evaluate "f(10)" asynchronously calling callback when finished.

print("I am smit patel")

I am smit patel


In [38]:
from multiprocessing import Pool
def f(x):
    print("hello")
    return x*x

pool = Pool(processes=1)              # Start a worker processes.
result = pool.apply_async(f, [10] , callback) # Evaluate "f(10)" asynchronously calling callback when finished.

print("I am smit patel")

NameError: name 'callback' is not defined

In [56]:
import multiprocessing
  
def print_cube():
    print("In the cube")

def print_square(num):
    print("In the square")
    
if __name__ == "__main__":
    p1 = Process(target = print_square)
    p2 = Process(target = print_cube)
    
    p1.start()
    p2.start()
    
    p1.join()
    p2.join()
    
    print("done")
    
  

NameError: name 'Process' is not defined

In [41]:
# importing the multiprocessing module
import multiprocessing
import os
  
def worker1():
    # printing process id
    print("ID of process running worker1: {}".format(os.getpid()))
  
def worker2():
    # printing process id
    print("ID of process running worker2: {}".format(os.getpid()))
  
if __name__ == "__main__":
    # printing main program process id
    print("ID of main process: {}".format(os.getpid()))
  
    # creating processes
    p1 = multiprocessing.Process(target=worker1)
    p2 = multiprocessing.Process(target=worker2)
  
    # starting processes
    p1.start()
    p2.start()
  
    # process IDs
    print("ID of process p1: {}".format(p1.pid))
    print("ID of process p2: {}".format(p2.pid))
  
    # wait until processes are finished
    p1.join()
    p2.join()
  
    # both processes finished
    print("Both processes finished execution!")
  
    # check if processes are alive
    print("Process p1 is alive: {}".format(p1.is_alive()))
    print("Process p2 is alive: {}".format(p2.is_alive()))

ID of main process: 10264
ID of process p1: 27292
ID of process p2: 4808
Both processes finished execution!
Process p1 is alive: False
Process p2 is alive: False


In [2]:
from multiprocessing import Process
import threading

import pyttsx3
engine = pyttsx3.init()

def textTospeech(word):
    engine.say(word)
    engine.runAndWait()

def f(name):
    print('hello', name)

if __name__ == '__main__':
#     p = Process(target=f, args=('bob',))
#     p = threading.Thread(target=f, args=('bob',))
    p = threading.Thread(target=textTospeech, args=('THis is the smit patel',))
    p.start()
    p.endLoop()
    print("After thread abcd")
#     p.join()

Exception in thread Thread-6 (textTospeech):
Traceback (most recent call last):
  File "C:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "C:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\DELL\AppData\Local\Temp\ipykernel_12456\3061053150.py", line 9, in textTospeech
  File "C:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\pyttsx3\engine.py", line 177, in runAndWait
    raise RuntimeError('run loop already started')
RuntimeError: run loop already started


AttributeError: 'Thread' object has no attribute 'endLoop'

In [3]:
p.endLoop()

AttributeError: 'Thread' object has no attribute 'endLoop'