!pip install tensorflow tensorflow-gpu opencv-python mediapipe sklearn matplotlib

!pip install pyttsx3

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [6]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [2]:
mpHands = mp.solutions.hands
mpDraw = mp.solutions.drawing_utils

In [3]:
def extract_keypts(result):
    lms = []
    if result.multi_hand_landmarks:
        myhand = result.multi_hand_landmarks[0]
        lms=np.array([[res.x, res.y, res.z] for res in myhand.landmark]).flatten()
    else:
        lms=np.zeros(21*3)
    return lms

Create Folders for Landmarks

In [4]:
data_path = os.path.join('mp_data')
letters = np.array(['A', 'B', 'C','D','E'])
no_seq = 30
seq_len = 30

In [5]:
for l in letters:
    for seq in range(no_seq):
        try:
            os.makedirs(os.path.join(data_path,l,str(seq)))
        except:
            pass

Data Collection

In [None]:
cap = cv2.VideoCapture(0)
with mpHands.Hands(static_image_mode=False, max_num_hands=2, model_complexity=1, min_detection_confidence=0.5, min_tracking_confidence=0.5, ) as hands:
    
    for let in letters:
        for seq in range(no_seq):
            for frame_num in range(seq_len):
                ret, img = cap.read()
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                result= hands.process(img)
                
                if result.multi_hand_landmarks:
                    for handLms in result.multi_hand_landmarks:
                        mpDraw.draw_landmarks(img,handLms,mpHands.HAND_CONNECTIONS, mpDraw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),mpDraw.DrawingSpec(color=(80256,121),thickness=1, circle_radius=1))
                
                if frame_num == 0:
                    cv2.putText(img,"start collection {}",(130,200),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),4,cv2.LINE_AA)
                    cv2.putText(img,"collecting frame for {l} video {s:d}".format(l=let,s=seq),(15,12),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),4,cv2.LINE_AA)
                    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                    cv2.imshow("feed",img)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(img,"collecting frame for {l} video {s:d}".format(l=let,s=seq),(15,12),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),4,cv2.LINE_AA)
                    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                    cv2.imshow("feed",img)
                keypoints  =  extract_keypts(result)
                np_path = os.path.join(data_path,let,str(seq),str(frame_num))
                np.save(np_path, keypoints)
                
               

        
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
    cap.release()
    cv2.destroyAllWindows()

In [7]:
labelmap = {label:num for num,label in enumerate(letters)}

In [8]:
sequences,label = [], []
for let in letters:
    for seq in range(no_seq):
        window = []
        for fn in range(seq_len):
            res = np.load(os.path.join(data_path, let,str(seq),"{}.npy".format(fn)))
            window.append(res)
        
        sequences.append(window)
      
        label.append(labelmap[let])

In [9]:
X = np.array(sequences)

In [10]:
Y  = to_categorical(label).astype(int)

In [11]:
import tensorflow as tf

In [12]:
x_train, x_test, y_train, y_test  = train_test_split(X,Y,test_size = 0.2)

LSTM

In [13]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from datetime import datetime

In [14]:
log_dir = os.path.join('Logs',datetime.now().strftime("%Y%m%d-%H%M%S"))
tb_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

In [15]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,63))) 
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(letters.shape[0], activation='softmax'))



In [16]:
model.compile(optimizer = 'Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [27]:
model.fit(x_train,y_train,epochs=500,callbacks=[tb_callback])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

KeyboardInterrupt: 

In [28]:
re = model.predict(x_test)



In [56]:
letters[np.argmax(re[5])]

'E'

In [57]:
letters[np.argmax(y_test[5])]

'E'

In [31]:
model.save('l1.h5')

In [21]:
model.load_weights('l1.h5')

In [32]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [33]:
yhat = model.predict(x_test)



In [34]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [35]:
multilabel_confusion_matrix(ytrue, yhat)

array([[[25,  0],
        [ 0,  5]],

       [[24,  0],
        [ 0,  6]],

       [[26,  0],
        [ 0,  4]],

       [[25,  0],
        [ 0,  5]],

       [[20,  0],
        [ 0, 10]]], dtype=int64)

In [36]:
accuracy_score(ytrue,yhat)

1.0

Real time

In [1]:
import pyttsx3 as ptx

In [63]:
sequence = []
let_p = ''
threshold = 0.7

cap = cv2.VideoCapture(0)
with mpHands.Hands(static_image_mode=False, max_num_hands=2, model_complexity=1, min_detection_confidence=0.5, min_tracking_confidence=0.5, ) as hands:
    
    while cap.isOpened():
        #cap.set(cv2.CAP_PROP_FPS,5)
        ret, img = cap.read()
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        result= hands.process(img)
        #print(result)
        if result.multi_hand_landmarks:
            for handLms in result.multi_hand_landmarks:
                 mpDraw.draw_landmarks(img,handLms,mpHands.HAND_CONNECTIONS, mpDraw.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),mpDraw.DrawingSpec(color=(80256,121),thickness=1, circle_radius=1))
        
        key_pts = extract_keypts(result)
        sequence.append(key_pts)
        #sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            sequence.clear()
            #print(res)
        
        if res[np.argmax(res)] > threshold:
            if letters[np.argmax(res)] != let_p:
                let_p = letters[np.argmax(res)]
                engine = ptx.init()
                engine.say(let_p)
                engine.runAndWait()
        cv2.rectangle(img, (0,0), (100, 80), (200,168,50), -1)
        cv2.putText(img, let_p, (20,30), cv2.FONT_HERSHEY_SIMPLEX,1, (40,255,90),2,cv2.LINE_AA)
        
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        cv2.imshow("Image",img)
        #let_p = ''
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()