In [1]:
import cv2
import ctypes
import numpy as np

import matplotlib.pyplot as plt
from tqdm import tqdm
import torch
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image

import sys
sys.path.append("../")

from Utils.utils import (
    COLORMAP,
    heatmaps_to_coordinates,
    N_KEYPOINTS,
    RAW_IMG_SIZE,
    MODEL_IMG_SIZE,
    show_batch_predictions,
    DATASET_MEANS,
    DATASET_STDS,
)
from Utils.model import ShallowUNet
from Utils.dataset import FreiHAND

In [2]:
config = {
    "data_dir": "FreiHAND_pub_v2",
    "model_path": "model_final",
    "test_batch_size": 4,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}

In [3]:
model = ShallowUNet(3, 21)
model.load_state_dict(
    torch.load(config["model_path"], map_location=torch.device(config["device"]))
)
model.to(config["device"])
model.eval()
print("Model loaded")

Model loaded


In [4]:
image_transform = transforms.Compose(
            [   
                transforms.CenterCrop(1080),
                transforms.Resize(MODEL_IMG_SIZE),
                transforms.ToTensor(),
                transforms.Normalize(mean=DATASET_MEANS, std=DATASET_STDS),
            ]
        )

In [5]:
def pre_process(image):
    image = Image.fromarray(image)
    print(image)
    image = image_transform(image)
    image = image.float()
    image = image.cuda()
    image = image.unsqueeze(0)
    return image

In [29]:
cap = cv2.VideoCapture(0)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)

ret, frame = cap.read()

# Change colour format from BGR to RGB
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

# Flip image along y axis
image = cv2.flip(image, 1)

# Set flag to false
image.flags.writeable = False

image = pre_process(image)

image.shape

<PIL.Image.Image image mode=RGB size=1920x1080 at 0x158AB424A90>


torch.Size([1, 3, 128, 128])

In [36]:
cap.release()
cv2.destroyAllWindows()  

In [7]:
WINDOW_NAME = 'Demo'

cap = cv2.VideoCapture(0)

user32 = ctypes.windll.user32
screen_width, screen_height = user32.GetSystemMetrics(0), user32.GetSystemMetrics(1)

cap.set(cv2.CAP_PROP_FRAME_WIDTH, screen_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, screen_height)


while cap.isOpened():

    ret, frame = cap.read()

    # Change colour format from BGR to RGB
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Flip image along y axis
    image = cv2.flip(image, 1)

    # Set flag to false
    image.flags.writeable = False

    image_tensor = pre_process(image)
    pred_heatmaps = model(image_tensor)
    pred_heatmaps = pred_heatmaps.detach().cpu().numpy()
    pred_keypoints = heatmaps_to_coordinates(pred_heatmaps).squeeze(0)

    for joint in pred_keypoints:
        joint = tuple(np.multiply(joint, [1920,1080]).astype(int))
        cv2.circle(image, joint, 2, (255,255,255), 2)

    # Set flag back to true
    image.flags.writeable = True

    # Change colour format from RGB to BGR
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    cv2.imshow(WINDOW_NAME, image)

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()  

<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mode=RGB size=1920x1080 at 0x1A4341DFE20>
<PIL.Image.Image image mo

In [None]:
cap = cv2.VideoCapture(0)

with mp_hands.Hands(min_detection_confidence=0.8, min_tracking_confidence=0.5) as hands:
    while cap.isOpened():
        ret, frame = cap.read()

        # Change colour format from BGR to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Flip image along y axis
        image = cv2.flip(image, 1)

        # Set flag to false
        image.flags.writeable = False
        
        # Detections
        results = hands.process(image)

        # Set flag back to true
        image.flags.writeable = True

        # Change colour format from RGB to BGR
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # print(results)
        
        # Rendering results
        if results.multi_hand_landmarks:
            for idx, hand in enumerate(results.multi_hand_landmarks):
                mp_drawing.draw_landmarks(image, hand, mp_hands.HAND_CONNECTIONS)

            # draw_finger_angle(image, results, joint_list)
            draw_drumstick(image, results, drum_joint_list)


        cv2.imshow('Hand Tracking', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()  