# 0. Single Pose Estimation of Live Camera and Alien Video

In [None]:
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python matplotlib

- source: https://github.com/nicknochnack/MoveNetLightning/blob/main/MoveNet%20Tutorial.ipynb
- source: https://www.youtube.com/watch?v=SSW9LzOJSus

the important part is to download the model and put it into the jupyter notebook working directory, so that we can reference it!

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np # draw keypoints

In [2]:
from matplotlib import pyplot as plt

In [3]:
# Optional if you are using a GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

# 3. Draw Keypoints

### Get the parameters (shape): width, height, channel
    Denormalize the output coordinates by mutliplying the keypoints with the with the parameters
    Iterate through the denormalized keypoints and draw the circles where the confidence score is higher than the preset threshold


In [4]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    # Get the height (y), width (x), and number of channels (c) of the input frame
    y, x, c = frame.shape
    
    # Reshape and scale keypoints to match the frame dimensions
    shaped = np.squeeze(np.multiply(keypoints, [y, x, 1]))
    
    # Iterate through each keypoint
    for kp in shaped:
        ky, kx, kp_conf = kp  # Extract keypoint coordinates and confidence
        
        # Check if the keypoint confidence is above the threshold
        if kp_conf > confidence_threshold:
            # Draw a circle at the keypoint location
            cv2.circle(frame, (int(kx), int(ky)), 6, (0, 0, 255), -1)
            # The parameters are: frame, center coordinates (as integers), radius (6), color (red), and thickness (-1, filled circle)


# 4. Draw Edges

In [5]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [6]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (255,0,0), 4)

In [7]:
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
interpreter.allocate_tensors()

In [None]:
#interpreter = tf.lite.Interpreter(model_path='lite-model_movinet_a0_stream_kinetics-600_classification_tflite_float16_2.tflite')
#interpreter.allocate_tensors()

In [8]:
#video capture
#quite with key 'q'

cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    
    # Rendering 
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    cv2.imshow('MoveNet Lightning', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

In [9]:
#alien video capture
cap = cv2.VideoCapture('alien.mkv')
while True:  # Infinite loop for continuous playback
    ret, frame = cap.read()
    
    # Check if there are no more frames to read (end of the video)
    if not ret:
        # Reset the video capture object to the beginning of the video
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        continue
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    
    # Rendering 
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    cv2.imshow('MoveNet Lightning', frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()


### Save the video with the prediction without a loop

In [None]:

# Open the input video file
cap = cv2.VideoCapture('alien.mkv')
    
# Check if the video file was opened successfully
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

# Get the original video's width and height
frame_width = int(cap.get(3))  # Width
frame_height = int(cap.get(4))  # Height

# Define the output video file name and codec
output_file = 'output_video.mkv'  # Change the file name and extension as needed
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Codec (adjust as needed)

# Create a VideoWriter object to save the video
out = cv2.VideoWriter(output_file, fourcc, 30.0, (frame_width, frame_height))  # Adjust frame rate and frame size as needed

while True:
    ret, frame = cap.read()
    
    if not ret:
        break
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    
    # Rendering 
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    cv2.imshow('MoveNet Lightning', frame)
    
    # Write the processed frame to the output video
    out.write(frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release video objects
cap.release()
out.release()
cv2.destroyAllWindows()


### Save the video with prediction with a loop

In [None]:

# Open the input video file
cap = cv2.VideoCapture('alien.mkv')

# Check if the video file was opened successfully
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

# Get the original video's width and height
frame_width = int(cap.get(3))  # Width
frame_height = int(cap.get(4))  # Height

# Define the output video file name and codec
output_file = 'output_video.mkv'  # Change the file name and extension as needed
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Codec (adjust as needed)

# Create a VideoWriter object to save the video
out = cv2.VideoWriter(output_file, fourcc, 30.0, (frame_width, frame_height))  # Adjust frame rate and frame size as needed

while True:
    ret, frame = cap.read()
    
    if not ret:
        # Reset the video capture object to the beginning of the video
        cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
        continue
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    
    # Rendering 
    draw_connections(frame, keypoints_with_scores, EDGES, 0.4)
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    cv2.imshow('MoveNet Lightning', frame)
    
    # Write the processed frame to the output video
    out.write(frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release video objects
cap.release()
out.release()
cv2.destroyAllWindows()


# 1. try of multipose detection 
### single pose detection is not ideal with multiple agents that are not humanoid

- Source: https://www.youtube.com/watch?v=KC7nJtBHBqg
- Model: https://tfhub.dev/google/movenet/multipose/lightning/1

the hub.load on link does not work for some reason so: 
- I downloaded the tar file
- unzipped the tar file and moved the folder into the working directory and specified the path below

In [6]:
# Download the model from TF Hub.
import tensorflow as tf
import tensorflow_hub as hub

#load model
model = hub.load('movenet_multipose_lightning_1')


In [7]:
movenet = model.signatures['serving_default'] # string indexing and set up new variables

In [8]:

# Function to loop through each person detected and render
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
    for person in keypoints_with_scores:
        draw_connections(frame, person, edges, confidence_threshold)
        draw_keypoints(frame, person, confidence_threshold)



In [9]:
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), -1)


In [10]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

In [11]:
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 4)

In [23]:
cap = cv2.VideoCapture('alien.mkv')
while cap.isOpened():
    ret, frame = cap.read()
    
    # Resize image
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 384,640)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Detection section
    results = movenet(input_img)
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1)
    
    cv2.imshow('Movenet Multipose', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


## save the multi-pose video with predictions
 problem it is faster and short than the input video!

In [29]:

# Open the input video file
cap = cv2.VideoCapture('alien.mkv')

# Check if the video file was opened successfully
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

# Get the original video's width and height
frame_width = int(cap.get(3))  # Width
frame_height = int(cap.get(4))  # Height

# Define the codec and create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Codec (adjust as needed)
output_video = cv2.VideoWriter('output_video.mkv', fourcc, 8.0, (frame_width, frame_height))  # Adjust frame rate and frame size as needed

while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret:
        break
    
    # Resize image
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 384, 640)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Detection section
    results = movenet(input_img)
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1)
    
    # Write the frame to the output video
    output_video.write(frame)
    
    cv2.imshow('Movenet Multipose', frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release video objects
cap.release()
output_video.release()
cv2.destroyAllWindows()


In [12]:

# Open the input video file
cap = cv2.VideoCapture('alien2.mkv')

# Check if the video file was opened successfully
if not cap.isOpened():
    print("Error: Could not open video file.")
    exit()

# Get the original video's width and height
frame_width = int(cap.get(3))  # Width
frame_height = int(cap.get(4))  # Height

# Define the codec and create a VideoWriter object to save the output video
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # Codec (adjust as needed)
output_video = cv2.VideoWriter('output_video.mkv', fourcc, 8.0, (frame_width, frame_height))  # Adjust frame rate and frame size as needed

while cap.isOpened():
    ret, frame = cap.read()
    
    if not ret:
        break
    
    # Resize image
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 384, 640)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Detection section
    results = movenet(input_img)
    keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Render keypoints 
    loop_through_people(frame, keypoints_with_scores, EDGES, 0.1)
    
    # Write the frame to the output video
    output_video.write(frame)
    
    cv2.imshow('Movenet Multipose', frame)
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release video objects
cap.release()
output_video.release()
cv2.destroyAllWindows()
