In [1]:
import tensorflow as tf
import cv2
import numpy as np

import threading
from time import time

from pathlib import Path
from dotenv import dotenv_values

from yesc.utils import open_video, mp_hands

In [2]:
config = dotenv_values('.config')
model_path = Path(config['MODEL_PATH'])
data_path = Path(config['DATA_PATH'])
output_path = data_path/'out'

In [3]:
model = tf.keras.models.load_model(str(model_path/'landmark-classifier_v2'))

In [4]:
def parse_landmark(hand_landmark):
    landmark = tuple((lm.x, lm.y, lm.z) for lm in hand_landmark.landmark)
    return np.array(landmark)

In [5]:
import enum
class CAMERA(enum.Enum):
    DETECT = 0
    COUNTDOWN = 1
    CAPTURE = 2

In [6]:
WIN_NAME: str = "Camera"
THRES: float = .7
RECORD_VIDEO: bool = False

countdown = 3

# ============================= THE SYSTEM ===========================
camera_state = CAMERA.DETECT
timer = None

video_writer = None
if RECORD_VIDEO:
    video_writer = cv2.VideoWriter(
        str(output_path/'the_app.avi'), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 15, (640, 480))


def on_count():
    global countdown
    countdown -= 1
    if countdown == 0:
        global camera_state
        camera_state = CAMERA.CAPTURE
        countdown = 3


def put_text(text, fontScale=12., **kwargs):
    cv2.putText(frame, text, (240, 320),
                cv2.FONT_HERSHEY_PLAIN, fontScale=fontScale, color=(0, 255, 0), thickness=4, **kwargs)


with open_video(0) as video, mp_hands.Hands(static_image_mode=True,
                                            max_num_hands=2,
                                            min_detection_confidence=0.7) as hands:
    etas = 0
    for frame in video:
        frame = cv2.flip(frame.copy(), 1)

        if camera_state == CAMERA.DETECT:
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = hands.process(image)
            if results.multi_hand_landmarks:
                for i, hand_landmarks in enumerate(results.multi_hand_landmarks):
                    hand_landmark = hand_landmarks
                    landmark = parse_landmark(hand_landmark)
                    pred = model.predict(landmark.reshape((1, -1))).ravel()[0]
                    if pred >= THRES:
                        camera_state = CAMERA.COUNTDOWN
                        timers = [threading.Timer(i+1, on_count)
                                  for i in range(3)]
                        for timer in timers:
                            timer.start()
                        break

        elif camera_state == CAMERA.COUNTDOWN:
            put_text(str(countdown))
        elif camera_state == CAMERA.CAPTURE:
            filename = str(output_path/f'capture_{int(time())}.jpg')
            cv2.imwrite(filename, frame)

            # Demonstrate actual fileread to confirm that the picture was stored.
            image = cv2.imread(filename)
            cv2.imshow('Result', image)
            camera_state = CAMERA.DETECT

        cv2.imshow(WIN_NAME, frame)
        if RECORD_VIDEO:
            video_writer.write(frame)

cv2.destroyAllWindows()
if video_writer:
    video_writer.release()
