In [None]:
import os
import cv2
import mediapipe as mp
import numpy as np
import mediapipe as mp

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

import feedforward

## Setup folder and register the training data

In [None]:
VIDEOS_PATH = os.path.join('dist/videos') 

In [None]:
feedforward.create_folder_if_not_exists(VIDEOS_PATH)

In [None]:
# feedforward.clear_folder(DATA_PATH)

In [None]:
NUM_VIDEOS = 20
NUM_FRAMES_PER_VIDEO = 100

In [None]:
video_prefix = 'arrow'
feedforward.register_videos(VIDEOS_PATH, video_prefix, NUM_VIDEOS, NUM_FRAMES_PER_VIDEO, 'beautiful_gesture')

## Classification

### Data Augmentation

In [None]:
augmented_prefix = "augmented_rotation_"
feedforward.augment_dataset_rotation(VIDEOS_PATH, augmented_prefix)

In [None]:
feedforward.delete_files_with_prefix(VIDEOS_PATH, augmented_prefix)

### Building the training set

In [None]:
WINDOW_LEN = 20
TERGET_LANDMARK = mp.solutions.holistic.PoseLandmark.RIGHT_WRIST
X_training, y_training = feedforward.build_training_arrow_data(VIDEOS_PATH, WINDOW_LEN, TERGET_LANDMARK)
X_training.shape, y_training.shape

### Build and Train the model

In [None]:
model = Sequential()

NUM_OUTPUT_UNITS = 2

model.add(LSTM(64, input_shape=(X_training.shape[1], X_training.shape[2]), activation='relu', return_sequences=True)) # LSTM layer with input shape (X_training.shape[1], X_training.shape[2]))
model.add(LSTM(64, activation='relu', return_sequences=False)) # LSTM layer with return_sequences=False for the final prediction# LSTM layer with return_sequences=False for the final prediction
model.add(Dense(NUM_OUTPUT_UNITS))# Dense output layer with NUM_OUTPUT_UNITS units

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

history = model.fit(X_training, y_training, batch_size=32, epochs=100, validation_split=0.2)

model.summary()

## Test

In [None]:
cap = cv2.VideoCapture(feedforward.CAMERA_INDEX)
holistic = mp.solutions.holistic.Holistic()
frame_buffer = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = holistic.process(frame_rgb)

    if results.pose_landmarks:
        target_coords = results.pose_landmarks.landmark[TERGET_LANDMARK]
        nose_coords = results.pose_landmarks.landmark[mp.solutions.holistic.PoseLandmark.NOSE]

        target_input = [target_coords.x,  target_coords.y]
        nose_input = [nose_coords.x, nose_coords.y]

        frame_buffer.append(target_input + nose_input)

        height, width, _ = frame.shape

        pixel_coords_x, pixel_coords_y = int(target_coords.x * width), int(target_coords.y * height)
        cv2.circle(frame, (pixel_coords_x, pixel_coords_y), 5, (255, 0, 0), -1)

        if len(frame_buffer) >= WINDOW_LEN/2:
            prediction_input = np.array(frame_buffer)[np.newaxis, :, :]
            prediction = model.predict(prediction_input)

            prediction_x = int(prediction[0][0] * width)
            prediction_y = int(prediction[0][1] * height)
            
            cv2.arrowedLine(frame, (pixel_coords_x, pixel_coords_y), (prediction_x, prediction_y), (255, 0, 255), 2)

            frame_buffer = frame_buffer[1:]

    cv2.imshow('Video', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()