In [1]:
import os
import cv2
import mediapipe as mp
import numpy as np
import mediapipe as mp

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

import feedforward

## Setup folder and register the training data

In [2]:
DATA_PATH = os.path.join('dist') 

In [None]:
feedforward.create_folder_if_not_exists(DATA_PATH)

In [None]:
feedforward.clear_folder(DATA_PATH)

In [None]:
num_videos = 20
num_frames_per_video = 100
prefix = 'arrow_wrist'

In [None]:
feedforward.register_videos(DATA_PATH, prefix, num_videos, num_frames_per_video)

## Build Model

In [None]:
prefix_for_augmented = "augmented_"
feedforward.augment_dataset(DATA_PATH, prefix_for_augmented)

In [None]:
feedforward.delete_files_with_prefix(DATA_PATH, prefix_for_augmented)

In [None]:
for target_idx, target in enumerate(mp.solutions.holistic.PoseLandmark): 
    print(target_idx, '=>' , target)

In [3]:
window_len = 20
target_joint = mp.solutions.holistic.PoseLandmark.LEFT_WRIST
X_training, y_training = feedforward.build_training_data(DATA_PATH, window_len, target_joint)
X_training.shape, y_training.shape

NameError: name 'mp' is not defined

In [None]:
model = Sequential()

model.add(LSTM(64, input_shape=(X_training.shape[1], X_training.shape[2]), activation='relu', return_sequences=True)) # LSTM layer with input shape (X_training.shape[1], X_training.shape[2]))
model.add(LSTM(64, activation='relu', return_sequences=False)) # LSTM layer with return_sequences=False for the final prediction# LSTM layer with return_sequences=False for the final prediction
model.add(Dense(2))# Dense output layer with 2 units

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

history = model.fit(X_training, y_training, batch_size=32, epochs=100, validation_split=0.2)

model.summary()

## Test

In [None]:
cap = cv2.VideoCapture(feedforward.CAMERA_INDEX)
holistic = mp.solutions.holistic.Holistic()
frame_buffer = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = holistic.process(frame_rgb)

    if results.pose_landmarks:
        right_wrist = results.pose_landmarks.landmark[mp.solutions.holistic.PoseLandmark.RIGHT_WRIST]

        height, width, _ = frame.shape

        wrist_x, wrist_y = int(right_wrist.x * width), int(right_wrist.y * height)

        cv2.circle(frame, (wrist_x, wrist_y), 5, (255, 0, 0), -1)

        wrist_coords = [
            right_wrist.x,
            right_wrist.y,
            # right_wrist.z
        ]

        frame_buffer.append(wrist_coords)

        if len(frame_buffer) >= window_len/2:
            prediction_input = np.array(frame_buffer)[np.newaxis, :, :]
            prediction = model.predict(prediction_input)

            prediction_x = int(prediction[0][0] * width)
            prediction_y = int(prediction[0][1] * height)

            cv2.circle(frame, (prediction_x, prediction_y), 5, (255, 0, 255), -1)
            
            cv2.arrowedLine(frame, (wrist_x, wrist_y), (prediction_x, prediction_y), (255, 0, 255), 2)

            frame_buffer = frame_buffer[1:]

    cv2.imshow('Video', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()