In [None]:
####### TO RUN A PRE-TRAINED MODEL, FOLLOW THE INSTRUCTIONS BELOW #######
    # import necessary basic libraries (STEP 1)
    # establish holistic and drawing mediapipe variables (STEP 2)
    # run required functions (STEPS 3 - 6)
    # import tensorflow and keras packages (STEP 7)
    # establish model variable (STEP 8)
    # load pre-trained model (STEP 9)
    # import scipy stats (STEP 10)
    # run real-time testing (STEP 11)

In [None]:
!pip install tensorflow==2.12.0 opencv-python scikit-learn matplotlib mediapipe

In [None]:
### STEP 1 ###
from matplotlib import pyplot as plt
import numpy as np
import mediapipe as mp
import cv2
import os
import time

In [None]:
### STEP 2 ###
mpHolistic = mp.solutions.holistic
mpDrawing = mp.solutions.drawing_utils

In [None]:
### STEP 3 ###
def mp_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Write colors to RGB
    image.flags.writeable = False                  # Image unwritable
    results = model.process(image)                 # Process image
    image.flags.writeable = True                   # Image writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert colors back to BGR
    return image, results

In [None]:
### STEP 4 ###
def draw_landmarks(image, results):
    mpDrawing.draw_landmarks(image, results.pose_landmarks, mpHolistic.POSE_CONNECTIONS) # Draw pose connections
    mpDrawing.draw_landmarks(image, results.left_hand_landmarks, mpHolistic.HAND_CONNECTIONS) # Draw left hand connections
    mpDrawing.draw_landmarks(image, results.right_hand_landmarks, mpHolistic.HAND_CONNECTIONS) # Draw right hand connection

In [None]:
### STEP 5 ###
def draw_styled_landmarks(image, results):
    # Draw pose connections
    mpDrawing.draw_landmarks(image, results.pose_landmarks, mpHolistic.POSE_CONNECTIONS,
                             mpDrawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mpDrawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mpDrawing.draw_landmarks(image, results.left_hand_landmarks, mpHolistic.HAND_CONNECTIONS, 
                             mpDrawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mpDrawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mpDrawing.draw_landmarks(image, results.right_hand_landmarks, mpHolistic.HAND_CONNECTIONS, 
                             mpDrawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mpDrawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [None]:
cap = cv2.VideoCapture(0)
# Set model 
with mpHolistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        image, results = mp_detection(frame, holistic)
        print(results)
        
        draw_styled_landmarks(image, results)

        cv2.imshow('OpenCV Feed', image)

        # Close window
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [None]:
draw_landmarks(frame, results)

In [None]:
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

In [None]:
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [None]:
### STEP 6 ###
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

In [None]:
result_test = extract_keypoints(results)

In [None]:
np.save('0', result_test)

In [None]:
#Exported data
DATA_PATH = os.path.join('/Users/User/Desktop/SignScore/MP_Data') 

# Signs to train
actions1 = np.array(['hello', 'goodbye', 'thankyou'])
actions2 = np.array(['how', 'are you', 'take care'])

# Fifty videos for each sign
no_sequences = 50

# 10 frames per video
sequence_length = 10

In [None]:
def makeFiles(actions):
    for action in actions: 
        for sequence in range(1,no_sequences+1):
            try: 
                os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
            except:
                pass

In [None]:
makeFiles(actions1)
makeFiles(actions2)

In [None]:
def collectData(actions):
    cap = cv2.VideoCapture(0)
    # Set mediapipe model 
    with mpHolistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

        # NEW LOOP
        # Loop through actions
        for action in actions:
            # Loop through sequences aka videos
            for sequence in range(1,no_sequences+1):
                # Loop through video length aka sequence length
                for frame_num in range(1,sequence_length+1):

                    # Read feed
                    ret, frame = cap.read()

                    # Make detections
                    image, results = mp_detection(frame, holistic)

                    # Draw landmarks
                    draw_styled_landmarks(image, results)

                    # NEW Apply wait logic
                    if frame_num == 0: 
                        cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        # Show to screen
                        cv2.imshow('OpenCV Feed', image)
                        cv2.waitKey(500)
                    else: 
                        cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                        # Show to screen
                        cv2.imshow('OpenCV Feed', image)

                    # NEW Export keypoints
                    keypoints = extract_keypoints(results)
                    npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                    np.save(npy_path, keypoints)

                    # Break gracefully
                    if cv2.waitKey(10) & 0xFF == ord('q'):
                        break

        cap.release()
        cv2.destroyAllWindows()

In [None]:
collectData(actions1)

In [None]:
collectData(actions2)

In [None]:
actions = np.concatenate((actions1, actions2), axis=None) #combine all signs into one array

In [None]:
label_map = {label:num for num, label in enumerate(actions)} #set labels
label_map

In [None]:
sequences, labels = [], []

In [None]:
for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        window = []
        for frame_num in range(1,sequence_length+1):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [None]:
np.array(sequences).shape

In [None]:
np.array(labels).shape

In [None]:
X = np.array(sequences)
X.shape

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
y = to_categorical(labels).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05) #split data into training and testing sets
y_test.shape

In [None]:
### STEP 7 ###
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization, Flatten
from tensorflow.keras.callbacks import TensorBoard

In [None]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
### STEP 8 ###
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(10,258)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.fit(X_train, y_train, epochs=1500, callbacks=[tb_callback]) #run training
model.summary()

In [None]:
res = model.predict(X_test)

In [None]:
actions[np.argmax(res[14])] #preliminary testing

In [None]:
actions[np.argmax(y_test[14])]

In [None]:
model.save('action.h5') #to be used in case of corruption

In [None]:
### STEP 9 ###
model.load('action.h5') #use to load in pre-trained model

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [None]:
yhat = model.predict(X_test)

In [None]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [None]:
multilabel_confusion_matrix(ytrue, yhat)

In [None]:
accuracy_score(ytrue, yhat)

In [None]:
### STEP 10 ###
from scipy import stats

In [None]:
### STEP 11 ###
## Testing in real time
sequence = []
sentence = ""
predictions = []
threshold = 0.99

cap = cv2.VideoCapture(0)
# Set model 
with mpHolistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mp_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-10:]
        
        if len(sequence) == 10:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            #print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        # Display word on screen
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    sentence = actions[np.argmax(res)]
                    
                else: 
                    sentence = ""

            
        cv2.rectangle(image, (0,0), (640, 40), (0, 0, 0), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()