In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

from Data_Collection import definitions
from Data_Collection import instantiation
from Data_Collection import detection
from Data_Collection import draw_landmarks
from Data_Collection import extract_keypoints

train = False

### Storing all the imported data

In [2]:
width_cam, height_cam, fps, actions, num_sequence, sequence_len, path = definitions()
mp_holistic, mp_drawing = instantiation()

### Prepocess Data and Create Labels and Features

In [3]:
if train:
    label_map = {label:num for num, label in enumerate(actions)}

    # Storing the videos (sequences) and labeling it with the respective action labels in one single large array
    sequences, labels = [], []
    for action in actions:
        for sequence in range(num_sequence):
            # The array 'window' stores all the frames for a particular sequence (video). 
            # This will have size of 30
            window = []
            for frame_num in range(sequence_len):
                res = np.load(os.path.join(path, action, str(sequence), "{}.npy".format(frame_num)))
                window.append(res)
            # The array 'sequences' stores all the videos for a particular action. 
            # This will have size of 90
            sequences.append(window)
            labels.append(label_map[action])

In [4]:
# The array 'sequences' stores all the videos and all the frames in that video for a particular action. 
# This will have shape of (90,30,1662) which means 90 videos, 30 frames per video, 1662 keypoints.

# [
#     [
#         [0th frame kepoints for 0th video of action 1], [1st frame keypoints for 0th video of action 1] ... [29th frame keypoint for 0th video of action 1]
#     ],
#     .
#     .
#     .
#     [
#         [0th frame kepoints for 29th video of action 1], [1st frame keypoints for 29th video of action 1] ... [29th frame keypoint for 29th video of action 1]
#     ]
    
#     This repeats 90 times total (30 videos of 3 actions)
# ]
# (np.array(sequences)).shape
# np.array(sequences)

In [5]:
if train:
    x = np.array(sequences)
    y = to_categorical(labels).astype(int)
    print("Shapes x and y")
    print(x.shape, y.shape)

    # Split the data into train-test data
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10, shuffle=True)

    print("Shapes of x_train and x_test")
    print(x_train.shape, x_test.shape)
    print("Shapes of y_test")
    print(y_test)

Shapes x and y
(120, 30, 258) (120, 3)
Shapes of x_train and x_test
(108, 30, 258) (12, 30, 258)
Shapes of y_test
[[0 1 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 1 0]
 [1 0 0]]


### Build and Train LSTM Neural Network

In [6]:
from tensorflow.keras.models import Sequential # Build a sequential NN
from tensorflow.keras.layers import LSTM, Dense # LSTM and the dense layers 
from tensorflow.keras.callbacks import TensorBoard # To monitor NN training
import tensorflow
# LSTM requires lesser data, is faster to train and is able to detect faster

In [7]:
if train:
    log_dir = os.path.join('Training_Logs')
    tb_callback = TensorBoard(log_dir=log_dir)

    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(x.shape[1],x.shape[2])))
    model.add(LSTM(128, return_sequences=True, activation='relu'))
    model.add(LSTM(64, return_sequences=False, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(actions.shape[0], activation='softmax'))

    opt = tensorflow.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.8,
                                           beta_2=0.899,
                                           epsilon=1e-07)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    model.fit(x_train, y_train, epochs=50, callbacks=[tb_callback])

    print()
    model.summary()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 30, 64)            82688     
                                                                 
 lstm_1 (LSTM)               (None, 30, 128)           98816     
                                                          

### Save Weights

In [8]:
if train:
    model.save('action.h5')

### Testing a test-case

In [9]:
if train:
    res = model.predict(x_test)
    prediction_label = actions[np.argmax(res[3])]
    true_label = actions[np.argmax(y_test[3])]

    print(" Predicted Label: {}, \n True Label: {}".format(prediction_label, true_label))
    
    print(actions[np.argmax(res[0])], actions[np.argmax(y_test[0])])
    print (" y_test data ")
    print(y_test)

 Predicted Label: Hello, 
 True Label: Hello
Thanks Thanks
 y_test data 
[[0 1 0]
 [0 0 1]
 [0 1 0]
 [1 0 0]
 [1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]
 [1 0 0]
 [0 1 0]
 [1 0 0]]


### Evaluation using Confusion Matrix and Accuracy

In [10]:
if train:
    from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

    y_predict = model.predict(x_test)

    # Converting the true labels from one-hot-encoded version to a list version with actual label numbers
    y_true = np.argmax(y_test, axis=1).tolist()

    # Converting the prdicted labels from one-hot-encoded version to a list version with actual label numbers
    y_predict = np.argmax(y_predict, axis=1).tolist()

    print("Confusion Matrix")
    print(multilabel_confusion_matrix(y_true, y_predict))
    print("The Accuracy Score is: ",accuracy_score(y_true, y_predict))

Confusion Matrix
[[[ 7  0]
  [ 0  5]]

 [[ 7  0]
  [ 0  5]]

 [[10  0]
  [ 0  2]]]
The Accuracy Score is:  1.0
