# 1. Import Dependencies


In [None]:
!pip install mediapipe opencv-python matplotlib tensorflow

In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp


# 2. Keypoints using MP Holistic

In [None]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # drawing utilities

In [None]:
def mp_detection(image : np.ndarray, model):
    """ 
        detect the landmark in the image 
        image : opencv frame
        model : holistic model from mediapipe
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert color of the image BGR -> RGB
    image.flags.writeable = False # not writable
    results = model.process(image) # detect the holistic 
    image.flags.writeable = True
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) # RGB -> BGR
    return image,results

In [None]:
def draw_styled_landmarks(image, results):
    """
        image : opencv frame/ image
        results : the results of the holistic model from mediapipe use the mp_detection to get the results 
    """
    try : 
        mp_drawing.draw_landmarks(image,results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                                mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                                )
    except Exception as e:
        pass
    try : 
        mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                )
    except Exception as e:
        pass
    try : 
        mp_drawing.draw_landmarks(image,results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                )
    except Exception as e:
        pass
    try : 
        mp_drawing.draw_landmarks(image,results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                )
    except Exception as e:
        pass

In [None]:
# test access to web cam 

cap = cv2.VideoCapture(0) # device 0

# initialse detection confidence and then the confidence of tracking those detections
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:

    while cap.isOpened():
        ret, frame = cap.read() # read one fream at this point of time

        # detection of the keypoint
        image, resuts = mp_detection(frame,holistic_model)
        

        cv2.imshow("openCV feed cam", frame) # window

        if cv2.waitKey(10) & 0xFF == ord('q'): # wait a key is pressed and check current key is 'q'
            break

    #release ressources
    cap.release()
    cv2.destroyAllWindows()


In [None]:
draw_styled_landmarks(frame,resuts)
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

### 2.1 Test reading Keypoint from downloaded video

In [None]:
# test access to video file

cap = cv2.VideoCapture(r"C:\Users\Kokou\Documents\Project\Sign-Language-Detection\data\subset\cont\videos\CLSFB - 01 ok\CLSFBI0103A_S002_B.mp4") # device 0

# initialse detection confidence and then the confidence of tracking those detections
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:

    while cap.isOpened():
        ret, frame = cap.read() # read one fream at this point of time

        # detection of the keypoint
        image, results = mp_detection(frame,holistic_model)
        draw_styled_landmarks(image,results)
        

        cv2.imshow("openCV feed cam", image) # window

        if cv2.waitKey(10) & 0xFF == ord('q'): # wait a key is pressed and check current key is 'q'
            break

    #release ressources
    cap.release()
    cv2.destroyAllWindows()



In [None]:
draw_styled_landmarks(frame,results)
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

## 3. Extract Keyoint Values

In [None]:
def extract_keypoints(results):
    """
    Extract all the keypoint from the results

    """
    # if no landmark for the hand => zeros array => no hand in teh frame
    # same idea for others part
    pose = np.array([[res.x,res.y,res.z,res.visibility]  for res in results.pose_landmarks.landmark ]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face  = np.array([[res.x,res.y,res.z]  for res in results.face_landmarks.landmark ]).flatten() if results.face_landmarks else np.zeros(468*3)
    left_hand = np.array([[res.x,res.y,res.z]  for res in results.left_hand_landmarks.landmark ]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x,res.y,res.z]  for res in results.right_hand_landmarks.landmark ]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, left_hand, right_hand])


In [None]:

extract_keypoints(results).shape == (468*3+33*4+21*3*2,)

## 4. Setup Folders for collection

In [255]:
# Using the keypoint to decode tghe sign language 
# set up folder to store keypoint associeted to a frame

DATA_PATH = os.path.join("MP_Data") # path for the data -> numpy arrays

actions = np.array(['aussi', 'oui', 'quoi']) # actions that we try to detect
no_sequences = 100 # 30 videos of data for each actions
sequence_length = 20 # 30 frames in length

# we collect 30 videos per action the each videos contains 30 frames of data 
# and each frame contains 1662 landmark values (3*30 sequences, 30 frames, 1662 landmark)


In [256]:
# create a folder for each action containing a folder for each fequence
for action in actions:
    for seq in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH,action,str(seq)))
        except OSError as e:
            pass

## 5.  Collect Keypoint Values for testing and training

In [257]:
def visio_function():
        # test access to video file

    cap = cv2.VideoCapture(0) # device 0

    # initialse detection confidence and then the confidence of tracking those detections
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:

        for action in actions:
            for sequence in range(no_sequences):
                for frame_num in range(sequence_length):
                        
                    ret, frame = cap.read() # read one fream at this point of time

                    # detection of the keypoint
                    image, results = mp_detection(frame,holistic_model)
                    draw_styled_landmarks(image,results)
                    
                    if frame_num == 0:
                        cv2.putText(image, "STARTING COLLECTION", (120,200), cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),4,cv2.LINE_AA)
                        cv2.putText(image, f"Collection frames for {action} video Number {sequence}", (15,12), cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
                        cv2.imshow("openCV feed cam", image) # window
                        
                        cv2.waitKey(2000)
                    else:
                        cv2.putText(image, f"Collection frames for {action} video Number {sequence}", (15,12), cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)

                        cv2.imshow("openCV feed cam", image) # window
                    
                    #save keypoint in file
                    keypoint = extract_keypoints(results)
                    np_file_path = os.path.join(DATA_PATH,action,str(sequence), str(frame_num))
                    np.save(np_file_path,keypoint)

                    if cv2.waitKey(10) & 0xFF == ord('q'): # wait a key is pressed and check current key is 'q'
                        break

            #release ressources
            cap.release()
            cv2.destroyAllWindows()



In [258]:
#Read video files downloaded (isol files) and get the landmark associeted 
BASE_PATH = "./../data/subset/isol/videos/"

# initialse detection confidence and then the confidence of tracking those detections
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:

    for action in actions:
        video_files = os.listdir(os.path.join(BASE_PATH,"_"+str(action).upper()+"_"))
        sequence = 0
        #loop through videos aka sequences
        while sequence != no_sequences:
            file = os.path.join(BASE_PATH,"_"+str(action).upper()+"_",video_files[sequence])
            cap = cv2.VideoCapture(file) # device 0

            for frame_num in range(sequence_length):
                    
                ret, frame = cap.read() # read one fream at this point of time

                # detection of the keypoint
                if not (frame is None) : # save the last keypoint if the nb of frame is lower than sequence length
                    image, results = mp_detection(frame,holistic_model)
                    draw_styled_landmarks(image,results)
                    # cv2.putText(image, f"Collection frames for {action} video Number {sequence}", (15,12), cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
                    # cv2.imshow("openCV feed", image) # window
                
                #save keypoint in file
                keypoints = extract_keypoints(results)
                if keypoints.shape != (1662,): #debug
                    print(keypoints.shape)
                np_file_path = os.path.join(DATA_PATH,action,str(sequence), str(frame_num))
                np.save(np_file_path,keypoints)

                if cv2.waitKey(10) & 0xFF == ord('q'): # wait a key is pressed and check current key is 'q'
                    break
            sequence +=1 # next file

    #release ressources
    cap.release()
    cv2.destroyAllWindows()



In [259]:
cap.release()
cv2.destroyAllWindows()

## 6. Preprocess Data and creeate Labels and features

In [260]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical #convert data into one hot encoded data

In [261]:
label_map = {label:num for num,label in enumerate(actions)}
sequences = [] # represent the feature data X
labels = [] # represente the Y data

for action in actions:
    for sequence in range(no_sequences):
        window = []
        subfolder = os.path.join(DATA_PATH,action,str(sequence))
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(subfolder, f"{frame_num}.npy"))
            window.append(res)
            
            if res.shape != (1662,): #debug
                print(res.shape)
        sequences.append(window) # containt 90 differente video 3 words * 30 videos
        labels.append(label_map[action])



In [262]:
np.array(sequences).shape

(300, 20, 1662)

In [263]:
X = np.array(sequences)
y = to_categorical(labels).astype(int) # convert label to binary version

In [264]:
X_train, X_test, y_train, y_test, = train_test_split(X,y,test_size=0.05)

In [265]:
y_train.shape

(285, 3)

## 7. Build and Train LSTM Neural Network

In [266]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard

In [267]:
# tensorBoard logs
log_dir = os.path.join("Logs")
tb_callback = TensorBoard(log_dir=log_dir)

In [268]:
model = Sequential()
model.add(LSTM(64, return_sequences=True,activation='relu', input_shape=(sequence_length,1662)))
model.add(LSTM(128, return_sequences=True,activation='relu'))
model.add(LSTM(64, return_sequences=False,activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))


In [269]:
# categorical_crossentropy => for multi-class classification model
# metrics => allow to track the accuracy 
model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['categorical_accuracy'])

In [270]:
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_24 (LSTM)              (None, 20, 64)            442112    
                                                                 
 lstm_25 (LSTM)              (None, 20, 128)           98816     
                                                                 
 lstm_26 (LSTM)              (None, 64)                49408     
                                                                 
 dense_24 (Dense)            (None, 64)                4160      
                                                                 
 dense_25 (Dense)            (None, 32)                2080      
                                                                 
 dense_26 (Dense)            (None, 3)                 99        
                                                                 
Total params: 596,675
Trainable params: 596,675
Non-tr

In [272]:
model.fit(X_train,y_train,epochs=100,callbacks=[tb_callback])
# 2000 epoch may be a bit high for training with a low amount of data
# stop the training if accuracy is acceptable and loss has stoppped consistently decreasing 

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x1c35ee29780>

## 8. Make Prediction

In [273]:
res = model.predict(X_test)
actions[np.argmax(res[0])]



'aussi'

In [274]:
actions[np.argmax(y_test[0])]

'aussi'

In [275]:
model.save('action.h1')
model.load_weights('action.h1')



INFO:tensorflow:Assets written to: action.h1\assets


INFO:tensorflow:Assets written to: action.h1\assets


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1c34d42b7c0>

## 9. Evaluation and accuracy

In [276]:
from sklearn.metrics import multilabel_confusion_matrix,accuracy_score

yhat = model.predict(X_test)
ytrue = np.argmax(y_test,axis=1).tolist()
yhat = np.argmax(yhat,axis=1).tolist()
multilabel_confusion_matrix(ytrue,yhat)



array([[[ 8,  1],
        [ 4,  2]],

       [[11,  1],
        [ 3,  0]],

       [[ 3,  6],
        [ 1,  5]]], dtype=int64)

In [277]:
accuracy_score(ytrue,yhat)

0.4666666666666667

## 10. Test in Real Time

In [278]:

colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    """
    probability for each of the three words
    """

    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame



In [None]:
sequence = []
sentence = []
threshold = 0.4 # threshold to accept a sign

cap = cv2.VideoCapture(0) # device 0

# initialse detection confidence and then the confidence of tracking those detections
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic_model:

    while cap.isOpened():
            
        ret, frame = cap.read() # read one fream at this point of time

        # detection of the keypoint
        image, results = mp_detection(frame,holistic_model)
        draw_styled_landmarks(image,results)
        
        # prediction using LSTM model
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence,axis=0))[0]
            print(actions[np.argmax(res)])
            
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]: # add to sentences only differente word
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)


        #save keypoint in file
        cv2.imshow("openCV feed cam", image) # window
        
        if cv2.waitKey(10) & 0xFF == ord('q'): # wait a key is pressed and check current key is 'q'
            break

    #release ressources
    cap.release()
    cv2.destroyAllWindows()

