In [3]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import mediapipe as mp

NameError: name 'tf' is not defined

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    return image, results

In [4]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z, res.visibility] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*4)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    
    return np.concatenate([pose, face, lh, rh])

In [13]:
import warnings
def extractPostitionFromVideo(filepath, number_of_frames=30, path_to_save="Training npy"):
    try:
        cap = cv2.VideoCapture(filepath)
        
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        num_frames_to_extract = number_of_frames
        
        if num_frames_to_extract > total_frames:
            warnings.warn(f"Requested {num_frames_to_extract} frames, but the video only has {total_frames} frames. Defaulting to max.")
            num_frames_to_extract = total_frames 
        
        frame_interval = total_frames // num_frames_to_extract
        
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
            frame_num = 0
            extracted_frame_count = 0
            keypoints_list = []

            while cap.isOpened() and extracted_frame_count < num_frames_to_extract:
                ret, frame = cap.read()
                
                if not ret:
                    break
                
                # Only process frames at the defined interval
                if frame_num % frame_interval == 0:
                    image, results = mediapipe_detection(frame, holistic)
                    keypoints = extract_keypoints(results)
                    keypoints_list.append(keypoints)
                    
                    # Increment extracted frame count
                    extracted_frame_count += 1

                frame_num += 1

        cap.release()

        # Optionally save the keypoints list as a .npy file or process as needed
        np.save(f'{path_to_save}/{filepath}.npy', keypoints_list)
        print("Saved Features from Video: ", filepath)
        print("Saved .npy at: ", f'{path_to_save}/{filepath}.npy')

    except Exception as e:
        print(e)
        cap.release()


In [14]:
# extractPostitionFromVideo('help me (1).MP4')

In [15]:
# data = np.load('help me (1).MP4.npy')

In [16]:
# print(data)
# data.shape

In [2]:
def extract_labels(filepath):
    list_of_names = []
    for _, name in enumerate(os.listdir(filepath)):
        list_of_names.append(name)
    
    return list_of_names

In [3]:
def one_hot_encode_labels(labels):
    label_map = {label:num for num, label in enumerate(labels)}
    return label_map

In [4]:
def normalize_sequences(sequences):
    min_values = np.min(sequences, axis=(0, 1)) 
    max_values = np.max(sequences, axis=(0, 1))  

    normalized_sequences = []
    for sequence in sequences:
        normalized_sequence = (sequence - min_values) / (max_values - min_values)
        normalized_sequences.append(normalized_sequence)

    return normalized_sequences

In [25]:
def prepareData(rootfolder):
    listofnames = os.listdir(rootfolder)
    dictofnames = one_hot_encode_labels(listofnames)
    
    sequences, labels = [], []
    
    for sign in os.listdir(rootfolder):
        for file in os.listdir(rootfolder+"/"+sign):
            sequences.append(np.load(rootfolder+"/"+sign+"/"+file))
            labels.append(dictofnames[sign])
            
    return sequences, labels


In [27]:
# #Extracting Sequences
# rootdir
# for sign in os.listdir('Training npy/Training Data'):

sequences, labels = prepareData('Training npy/Training Data')
sequences = np.array(sequences)
sequences.shape

(754, 30, 2130)

In [28]:
#Sequences should have a shape of (Total Number of videos, 30, 2130)
#labels should have a shape of total number of videos

In [29]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [30]:
labels

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,


In [31]:
y = to_categorical(labels)

In [32]:
y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [33]:
x_train, x_test, y_train, y_test = train_test_split(sequences, y, test_size=0.05)

In [34]:
x_train.shape

(716, 30, 2130)

In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import TensorBoard
import tensorflow as tf
import keras

In [36]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [37]:
log_dir = os.path.join('Logs')
tb_callbacks = TensorBoard(log_dir=log_dir)

In [4]:
actions = np.array(os.listdir('Training Data'))
actions.shape

(15,)

In [5]:
actions

array(['Are you free today', 'Can you repeat that please',
       'Congratulations', 'help me please', 'how are you', 'I am fine',
       'I love you', 'no', 'Please come,Welcome', 'Talk slower please',
       'Thank You', 'What are you doing', 'What do you do',
       'What Happened', 'yes'], dtype='<U26')

In [49]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,2130)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(256, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

  super().__init__(**kwargs)


In [50]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), loss=keras.losses.CategoricalCrossentropy(), metrics=['accuracy'])

In [51]:
model.summary()

In [52]:
model.fit(x_train, y_train, epochs=2000, callbacks=[tb_callbacks])

Epoch 1/2000








[1m21/23[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 32ms/step - accuracy: 0.0806 - loss: 2.7082






[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 193ms/step - accuracy: 0.0800 - loss: 2.7082
Epoch 2/2000
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 38ms/step - accuracy: 0.0753 - loss: 2.7074
Epoch 3/2000
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.0904 - loss: 2.7030
Epoch 4/2000
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - accuracy: 0.0977 - loss: 2.6709
Epoch 5/2000
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.0676 - loss: 2.7065
Epoch 6/2000
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.0896 - loss: 2.6948
Epoch 7/2000
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.0950 - loss: 2.7110
Epoch 8/2000
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.0876 - loss: 2.6934
Epoch 9/2000
[1m23/23[0m [32m━━━━━━━━━━━

KeyboardInterrupt: 

In [53]:
res = model.predict(x_test)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1s/step  


In [54]:
labels

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,


In [55]:
for i in range(x_test.shape[0]):
    print(f"Original Video: {actions[labels[i]]}")
    print(f"Predicted Output: {actions[np.argmax(res[i])]}")

Original Video: Are you free today
Predicted Output: Thank You
Original Video: Are you free today
Predicted Output: Please come,Welcome
Original Video: Are you free today
Predicted Output: Can you repeat that please
Original Video: Are you free today
Predicted Output: how are you
Original Video: Are you free today
Predicted Output: no
Original Video: Are you free today
Predicted Output: Congratulations
Original Video: Are you free today
Predicted Output: Thank You
Original Video: Are you free today
Predicted Output: Can you repeat that please
Original Video: Are you free today
Predicted Output: Please come,Welcome
Original Video: Are you free today
Predicted Output: Congratulations
Original Video: Are you free today
Predicted Output: Congratulations
Original Video: Are you free today
Predicted Output: Are you free today
Original Video: Are you free today
Predicted Output: What Happened
Original Video: Are you free today
Predicted Output: no
Original Video: Are you free today
Predicted 

In [56]:
model.save('VideoRecognitionV2_Bekar.keras')