1.Importing libraries

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as pltV
import time
import mediapipe as mp
from processing_utils import draw_styled_landmarsks , mediapipe_detection , extract_keypoints 
from processing_utils import make_dir , collecting_data , explore_model
from model_utils import create_model

2. Keypoints using MP Holistic

In [2]:
mp_holistic_model = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils 

3. Explore the model

In [3]:
explore_model(mp_holistic_model , mp_drawing)

Data shape is (1662,)


4. Setup Folders for Collection

In [4]:
#that is the path for exported data
DATA_PATH = os.path.join('MP_Data')
#actions we are tring to predict
actions = np.array(['open the bag' , 'put the book' , 'close the bag' , 'none'])
#thirty video worth of data
no_sequences = 40
#videos are going to be 30 frames
sequences_length = 30

In [5]:
make_dir(DATA_PATH , actions , no_sequences)

MP_Data\open the bag is created
MP_Data\put the book is created
MP_Data\close the bag is created
MP_Data\none is created


5. Collect Keypoint Values for Training and Testing

In [6]:
collecting_data(mp_holistic_model , mp_drawing,actions , no_sequences , sequences_length , DATA_PATH)

6. Preprocess Data and Create Labels and Features

In [None]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

In [None]:
label_map = {label:num for num , label in enumerate(actions)}

In [None]:
label_map

In [None]:
sequences , labels = [] , []
for action in actions :
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequences_length):
            res = np.load(os.path.join(DATA_PATH , action , str(sequence) , '{}.npy'.format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])


In [None]:
X = np.array(sequences)
y = to_categorical(labels).astype(int)

In [8]:
model = create_model()

In [None]:
model.fit(X, y, epochs=300,  batch_size=10 , verbose=0)

In [None]:
model.save('action.h5')

7. calling the model and start testing

In [None]:
from tensorflow import keras
model = keras.models.load_model('action.h5')

In [None]:
colors = (16 , 117 , 245)
def prob_viz(res , actions , input_frame , colors):
    output_frame = input_frame.copy()
    for num , prob in enumerate(res):
        cv2.rectangle(output_frame , (0 , 60 + num * 40 ) , #start_point : represents the top left corner of rectangle
            (int(prob * 100) ,  90 + num* 40 ), #end_point : represents the bottom right corner of rectangle
            colors , -1
                )
        cv2.putText(output_frame, actions[num]
            , (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX
            , 1, (255,255,255), 2, cv2.LINE_AA)
    return output_frame


In [None]:
# detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.95

cap = cv2.VideoCapture(0)
# Getting the settings of our video capture
#frame_width = int(cap.get(3))
#frame_height = int(cap.get(4))

#writ = cv2.VideoWriter('keyPointRecognition.avi',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))

# Set mediapipe model 
with mp_holistic_model.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarsks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
            if res[np.argmax(res)] > threshold:
                if len(sentence) > 0 :
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])
                    
            if len(sentence) > 5 :
                sentence = sentence[-5 :]
            image = prob_viz(res , actions , image , colors)
        
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        #writ.write(image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    #writ.release()
    cv2.destroyAllWindows()

In [None]:
cap.release()
writ.release()
cv2.destroyAllWindows()