In [None]:
#!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python matplotlib
#https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/3

In [2]:
# Libraries importieren
%matplotlib inline
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2

In [4]:
# Code um unsere Webcam zu nutzen

# open the default camera using default API
cap = cv2.VideoCapture(0) 

# endlosschleife
while True:
    # frame von der cam auslesen
    ret, frame = cap.read()
    # frame anzeigen
    cv2.imshow('MoveNet Lightning', frame)
    # 10 millisekunden warten, falls in dieser zeit q gedrückt wird: cancel
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break

# camera freigeben, alle fenster schließen
cap.release()
cv2.destroyAllWindows()

In [None]:
"""https://www.tensorflow.org/api_docs/python/tf/lite/Interpreter"""
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')

"""Since TensorFlow Lite pre-plans tensor allocations to optimize inference,
the user needs to call allocate_tensors() before any inference."""
interpreter.allocate_tensors()

In [5]:
"""A list with a dictionary containing details about an input tensor."""
interpreter.get_input_details()

[{'name': 'serving_default_input:0',
  'index': 0,
  'shape': array([  1, 192, 192,   3]),
  'shape_signature': array([  1, 192, 192,   3]),
  'dtype': numpy.float32,
  'quantization': (0.0, 0),
  'quantization_parameters': {'scales': array([], dtype=float32),
   'zero_points': array([], dtype=int32),
   'quantized_dimension': 0},
  'sparsity_parameters': {}}]

In [6]:
# 'shape': array([  1, 192, 192,   3]), wirft die Frage auf:
# Welche Auflösung produziert eigentlich unsere Cam?
frame.shape

(480, 640, 3)

In [None]:
#(480, 640) != (192, 192) und Seitenverhältnis falsch --> resize_with_pad

img = frame.copy()
padded = np.uint8(tf.image.resize_with_pad(img, 192,192))
padded = cv2.cvtColor(padded, cv2.COLOR_BGR2RGB)
plt.imshow(padded)

In [25]:
# Funktion, um ein Frame zu einem tf.float32 der Form (1,192,192,3) zu bringen
def preprocess_frame(frame, höhe, breite):
    frame = tf.image.resize_with_pad(frame, höhe, breite) # Resize mit padding
    frame = tf.expand_dims(frame, 0) # Erste Dimension hinzufügen
    return frame

In [33]:
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# This gives us index where we have to put in the input_tensor:
print(input_details[0]['index'])
# and this, after evaluating the netowrk on an input_tensor, at which index the output_tensor lies
print(output_details[0]['index'])

# Höhe
print(input_details[0]['shape'][1])
# breite
print(input_details[0]['shape'][2])

0
312
192
192


In [39]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))

    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 5, (0,255,0), -1) 

EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)

In [45]:
# Tie it all together

cap = cv2.VideoCapture(0)
while cap.isOpened():
    _, frame = cap.read()
    
    input_tensor = preprocess_frame(frame, höhe=input_details[0]['shape'][1], breite=input_details[0]['shape'][2])

    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], input_tensor)
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    
    # Rendering 
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    cv2.imshow('MoveNet Lightning', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()