In [13]:
import tensorflow as tf
import numpy as np
from PIL import Image, ImageDraw
import cv2

class Pose:
    KEYPOINTS = (
        'nose', 'left eye', 'right eye', 'left ear', 'right ear',
        'left shoulder', 'right shoulder', 'left elbow', 'right elbow',
        'left wrist', 'right wrist', 'left hip', 'right hip', 'left knee',
        'right knee', 'left ankle', 'right ankle'
    )

    EDGES = (
        ('nose', 'left eye'), ('nose', 'right eye'), ('nose', 'left ear'), 
        ('nose', 'right ear'), ('left ear', 'left eye'), ('right ear', 'right eye'), 
        ('left eye', 'right eye'), ('left shoulder', 'right shoulder'), 
        ('left shoulder', 'left elbow'), ('left shoulder', 'left hip'), 
        ('right shoulder', 'right elbow'), ('right shoulder', 'right hip'), 
        ('left elbow', 'left wrist'), ('right elbow', 'right wrist'), 
        ('left hip', 'right hip'), ('left hip', 'left knee'), ('right hip', 'right knee'), 
        ('left knee', 'left ankle'), ('right knee', 'right ankle')
    )

    def __init__(self, model_path):
        self.tflite_interpreter = tf.lite.Interpreter(model_path=model_path)
        self.tflite_interpreter.allocate_tensors()
        self.input_details = self.tflite_interpreter.get_input_details()
        self.output_details = self.tflite_interpreter.get_output_details()

    @staticmethod
    def _sigmoid(z):
        return 1 / (1 + np.exp(-z))

    def calc(self, img):
        input_data = np.expand_dims(np.asarray(img).astype('float32') / 128.0 - 1.0, axis=0)
        
        # Ensure input_data is 4D (batch_size, height, width, channels)
        if len(input_data.shape) == 3:
            input_data = np.expand_dims(input_data, axis=-1)
        
        self.tflite_interpreter.set_tensor(self.input_details[0]['index'], input_data)
        self.tflite_interpreter.invoke()

        output_tensor = [self.tflite_interpreter.get_tensor(self.output_details[i]["index"]) for i in range(len(self.output_details))]

        heatmaps = np.asarray(output_tensor[0])
        offsets = np.asarray(output_tensor[1])

        height = heatmaps[0].shape[0]
        width = heatmaps[0].shape[1]
        numKeypoints = heatmaps[0][0][0].size

        keypointPositions = []
        for keypoint in range(numKeypoints):
            maxVal = heatmaps[0][0][0][keypoint]
            maxRow, maxCol = 0, 0
            for row in range(height):
                for col in range(width):
                    if heatmaps[0][row][col][keypoint] > maxVal:
                        maxVal = heatmaps[0][row][col][keypoint]
                        maxRow, maxCol = row, col
            keypointPositions.append((maxRow, maxCol))

        output_dic = {}
        total_score = 0.0
        for idx, (bodypart, (positionY, positionX)) in enumerate(zip(Pose.KEYPOINTS, keypointPositions)):
            output_dic[bodypart] = {
                'x': int(positionX / (width - 1) * img.width + offsets[0][positionY][positionX][idx + numKeypoints]),
                'y': int(positionY / (height - 1) * img.height + offsets[0][positionY][positionX][idx]),
                'score': self._sigmoid(heatmaps[0][positionY][positionX][idx])
            }
            total_score += output_dic[bodypart]['score']
        
        output_dic['total_score'] = total_score / len(Pose.KEYPOINTS)

        return output_dic

    def draw_pose(self, pose, img, threshold=0.5, marker_color='green', color='yellow', marker_size=5, thickness=2):
        draw = ImageDraw.Draw(img)

        for p1, p2 in Pose.EDGES:
            if (pose[p1]['score'] < threshold) or (pose[p2]['score'] < threshold):
                continue
            draw.line((pose[p1]['x'], pose[p1]['y'], pose[p2]['x'], pose[p2]['y']), fill=color, width=thickness)

        for label, keypoint in pose.items():
            if label == 'total_score':
                break
            if keypoint['score'] < threshold:
                continue
            draw.ellipse(
                (int(keypoint['x'] - marker_size / 2), int(keypoint['y'] - marker_size / 2),
                 int(keypoint['x'] + marker_size / 2), int(keypoint['y'] + marker_size / 2)),
                fill=marker_color)

        return img

# Initialize the Pose class with your model path
pose = Pose(r"C:\Users\Renzo\Documents\Project Design\PoseNet\posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite")

# Load the input video
input_video_path = r'C:\Users\Renzo\Documents\Project Design\Datasets\test\jumping_jacks\jump jacks_39.mp4'
output_video_path = r'C:\Users\Renzo\Documents\Project Design\PoseNet\Pose Output\output_video.mp4'

cap = cv2.VideoCapture(input_video_path)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    # Convert the frame to PIL Image
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).resize((257, 257))

    # Perform keypoint detection
    output_dict = pose.calc(img)

    # Draw pose on the image
    img_with_pose = pose.draw_pose(output_dict, img, threshold=0.5, marker_color='green', color='yellow', marker_size=10, thickness=2)

    # Convert the image with pose back to OpenCV format
    frame_with_pose = cv2.cvtColor(np.array(img_with_pose.resize((frame_width, frame_height))), cv2.COLOR_RGB2BGR)

    # Write the frame to the output video
    out.write(frame_with_pose)

cap.release()
out.release()

print("Pose estimation video saved successfully.")


Pose estimation video saved successfully.
