In [None]:
import cv2

# Load the video
video_path = 'video_data/stick-figure.gif'  # Replace with your video path
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")
else:
    # Get FPS
    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"FPS: {fps}")
    cap.release()

Mediapipe observation : 
- Ran on few videos and realised that Mediapipe Pose landmarkers are not able to detect a person if the head is not shown in the frame. Likely because of the top down approach of pose detection.
- Pose landmarkers are trained for closeup cases, so it is difficult to detect a person from far away in the frame. 

In [None]:
!pip install ultralytics
!pip install opencv-python
!pip install ipywidgets
!pip install matpotlib
!pip install moviepy

In [None]:
from ultralytics import YOLO

MODEL_NAME = 'yolo11s-pose.pt'

model = YOLO(MODEL_NAME)

# result = model.track( source='video_data/fight_1.mp4', conf=0.7)
result = model.track( source='https://www.youtube.com/watch?v=Sa8gm2F0r8g&ab_channel=SufihanKhanMahabub', conf=0.5)


In [None]:
len(result)

In [None]:
import numpy as np
from moviepy import ImageSequenceClip
import json

annotated_frames = []
keypoints_frames = []
keypoints_per_frame = []

for index, r in enumerate(result):

    height, width = r.orig_shape

    blank_image = np.zeros((height, width, 3), dtype=np.uint8)

    img = r.plot(img=blank_image, boxes=False)
    keypoints_frames.append(img)

    img = r.plot(boxes=False)
    annotated_frames.append(img)

    keypoints_per_frame.append({"Frame": index, "Keypoints": r.keypoints.data.detach().cpu().tolist()})

fps = 25
clip = ImageSequenceClip(keypoints_frames, fps=fps)  # Adjust fps as needed
clip.write_gif("keypoints.gif", fps=fps)

clip = ImageSequenceClip(annotated_frames, fps=fps)  # Adjust fps as needed
clip.write_gif("annotated.gif", fps=fps)

# Specify the file path
file_path = "keypoints_per_frame.json"

# Write data to the JSON file
with open(file_path, "w", encoding="utf-8") as json_file:
    json.dump(keypoints_per_frame, json_file, ensure_ascii=False, indent=4)


In [None]:
import torch
import json

with open('keypoints_per_frame.json', 'r') as f:
    data_list = json.load(f)

print(data_list)
# Convert list back to tensor
tensor_restored = torch.tensor(data_list[0]["Keypoints"])


In [None]:
from ultralytics.engine.results import Results, Keypoints
import numpy as np

# Dummy orig image
orig_img = np.zeros((640, 480, 3), dtype=np.uint8)  # Height x Width x Channels

# Required attributes
orig_shape = (orig_img.shape[0], orig_img.shape[1])  # (Height, Width)
boxes = None         # You can fill with real Boxes object
masks = None         # Same for masks
probs = None
keypoints = Keypoints(tensor_restored, orig_shape)
obb = None
speed = {'preprocess': 0.0, 'inference': 0.0, 'postprocess': 0.0}
names = {0: 'person', 1: 'dog'}
path = 'dummy.jpg'
save_dir = None


# Create the Results object
results = Results(
    orig_img=orig_img,
    boxes=boxes,
    masks=masks,
    probs=probs,
    keypoints=tensor_restored,
    obb=obb,
    speed=speed,
    names=names,
    path=path,
)

results.keypoints.data.detach().cpu().tolist()

In [None]:
import matplotlib.pyplot as plt
import cv2

# Retrieve the plotted image for the 1138th frame
img = results.plot()

# Convert the image from BGR (OpenCV format) to RGB (matplotlib format)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Display the image inline
plt.imshow(img_rgb)
plt.axis('off')  # Hide axis ticks and labels
plt.show()
