In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [3]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [4]:
def mediapipe_detection(image,model):
  image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
  image.flags.writeable = False
  results = model.process(image)
  image.flags.writeable = True
  image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
  return image,results

In [5]:
def draw_landmarks(image,results):
  mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS)
  mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS)
  mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
  mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

In [6]:
def draw_styled_landmarks(image,results):
  mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS,
                            mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),
                            mp_drawing.DrawingSpec(color=(80,256,121),thickness=1,circle_radius=1)
                            )

  mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(80,22,10),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color=(80,44,121),thickness=2,circle_radius=2)
                            )

  mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(121,22,76),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color=(121,44,250),thickness=2,circle_radius=2)
                            )

  mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(245,117,66),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color=(245,66,230),thickness=2,circle_radius=2)
                            )

In [None]:
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
  while cap.isOpened():
    ret,frame  = cap.read()
    # frame = cv2.resize(frame,(800,600))

    image,results = mediapipe_detection(frame,holistic)

    draw_styled_landmarks(image,results)
    cv2.imshow('result',cv2.flip(image,1))

    if cv2.waitKey(10) & 0xFF == ord('q'):
      break
  cap.release()
  cv2.destroyAllWindows()

In [9]:
# plt.imshow(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))

In [7]:
def extract_keypoints(results):
  pose = np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
  face = np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
  lh = np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(63)
  rh = np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(63)
  return np.concatenate([pose,face,lh,rh])

In [11]:
# test_result = extract_keypoints(results)[-10:]

In [12]:
# np.save('test',test_result)

In [13]:
# np.load('test.npy')

In [8]:
DATA_PATH = 'D:\\Final Year Project\\Express-U\\data\\vid_dataset'

actions = np.array(['hello','thanks','iloveyou'])

no_of_sequences = 30

sequence_length = 30

Folder creation for training data

In [9]:
for action in actions:
  for sequence in range(no_of_sequences):
    try:
      os.makedirs(os.path.join(DATA_PATH,action,str(sequence)))
    except:
      pass

Collecting Data for training

In [10]:
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:

  for action in actions:
    for sequence in range(no_of_sequences):
      for frame_num in range(sequence_length):

        ret,frame  = cap.read()
        image,results = mediapipe_detection(frame,holistic)
        draw_styled_landmarks(image,results)

        if frame_num == 0:
          cv2.putText(image,'STARTING COLLECTION',(120,200),cv2.FONT_HERSHEY_SIMPLEX,
                      1,(0,255,0),1,cv2.LINE_AA)

          cv2.putText(image,f'collectiong frames for {action} Video Number {sequence}',(15,12),cv2.FONT_HERSHEY_SIMPLEX,
                      0.5,(0,0,255),1,cv2.LINE_AA)

          cv2.imshow('result',image)


          cv2.waitKey(2000)
        else:
          cv2.putText(image,f'collectiong frames for {action} Video Number {sequence}',(15,12),cv2.FONT_HERSHEY_SIMPLEX,
                      0.5,(0,0,255),1,cv2.LINE_AA)

          cv2.imshow('result',image)


        keypoints = extract_keypoints(results)
        npy_path = os.path.join(DATA_PATH,action,str(sequence),str(frame_num))
        np.save(npy_path,keypoints)


        if cv2.waitKey(10) & 0xFF == ord('q'):
          break

  cap.release()
  cv2.destroyAllWindows()

In [12]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [13]:
label_map = {label:num for num,label in enumerate(actions)}

In [14]:
label_map

{'hello': 0, 'thanks': 1, 'iloveyou': 2}

In [15]:
sequences,labels = [],[]
for action in actions:
  for sequence in range(no_of_sequences):
    window = []
    for frame_num in range(sequence_length):
      res = np.load(os.path.join(DATA_PATH,action,str(sequence),f'{frame_num}.npy'))
      window.append(res)
    sequences.append(window)
    labels.append(label_map[action])

In [16]:
x = np.array(sequences)

In [17]:
x.shape

(90, 30, 1662)

In [18]:
y = to_categorical(labels).astype(int)

In [19]:
y.shape

(90, 3)

In [20]:
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.05)

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense
from tensorflow.keras.callbacks import TensorBoard

In [22]:
log_dir = 'D:\\Final Year Project\\Express-U\\data\\Logs'
tb_callback = TensorBoard(log_dir=log_dir)

In [23]:
model = Sequential()
model.add(LSTM(64,return_sequences=True,activation='relu',input_shape=(30,1662)))
model.add(LSTM(128,return_sequences=True,activation='relu'))
model.add(LSTM(64,return_sequences=False,activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(32,activation='relu'))
model.add(Dense(actions.shape[0],activation='softmax'))

In [24]:
model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['categorical_accuracy'])

In [25]:
import tensorflow as tf
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss",patience=4, restore_best_weights=True)

In [26]:
model.fit(X_train,y_train,epochs=2000,callbacks=[callback])

Epoch 1/2000
Epoch 2/2000
Epoch 3/2000
Epoch 4/2000
Epoch 5/2000
Epoch 6/2000
Epoch 7/2000
Epoch 8/2000
Epoch 9/2000
Epoch 10/2000
Epoch 11/2000
Epoch 12/2000
Epoch 13/2000
Epoch 14/2000
Epoch 15/2000
Epoch 16/2000
Epoch 17/2000
Epoch 18/2000
Epoch 19/2000
Epoch 20/2000
Epoch 21/2000
Epoch 22/2000
Epoch 23/2000
Epoch 24/2000
Epoch 25/2000
Epoch 26/2000
Epoch 27/2000
Epoch 28/2000
Epoch 29/2000
Epoch 30/2000
Epoch 31/2000
Epoch 32/2000
Epoch 33/2000
Epoch 34/2000
Epoch 35/2000
Epoch 36/2000
Epoch 37/2000
Epoch 38/2000
Epoch 39/2000
Epoch 40/2000
Epoch 41/2000
Epoch 42/2000
Epoch 43/2000
Epoch 44/2000
Epoch 45/2000
Epoch 46/2000
Epoch 47/2000
Epoch 48/2000
Epoch 49/2000
Epoch 50/2000
Epoch 51/2000
Epoch 52/2000
Epoch 53/2000
Epoch 54/2000
Epoch 55/2000
Epoch 56/2000
Epoch 57/2000
Epoch 58/2000
Epoch 59/2000
Epoch 60/2000
Epoch 61/2000
Epoch 62/2000
Epoch 63/2000
Epoch 64/2000
Epoch 65/2000
Epoch 66/2000
Epoch 67/2000
Epoch 68/2000
Epoch 69/2000
Epoch 70/2000
Epoch 71/2000
Epoch 72/2000
E

KeyboardInterrupt: 

In [38]:
from sklearn.metrics import multilabel_confusion_matrix,accuracy_score

In [39]:
yhat = model.predict(X_test)

In [40]:
ytrue = np.argmax(y_test,axis=1).tolist()
yhat = np.argmax(yhat,axis=1).tolist()

In [41]:
multilabel_confusion_matrix(ytrue,yhat)

array([[[2, 2],
        [0, 1]],

       [[3, 0],
        [0, 2]],

       [[3, 0],
        [2, 0]]], dtype=int64)

In [42]:
accuracy_score(ytrue,yhat)

0.6

In [None]:
model.save('actions.h5')