In [2]:
import cv2
import numpy as np
import mediapipe as mp
import pickle
import time
from scipy.interpolate import interp1d
from fastdtw import fastdtw
from scipy.spatial.distance import cosine

In [3]:
mp_drawing=mp.solutions.drawing_utils
mp_pose=mp.solutions.pose

##### Visualizing Landmarks from WebCam (RealTime)
**This just shows the Landmarks on screen, not stores them.**

In [12]:
cap=cv2.VideoCapture(0)
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

            # Make detection
        results = pose.process(image)

            # Recolor back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # print(results)
        
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        cv2.imshow('Look at Yourself', image)
        if cv2.waitKey(10) & 0xFF==ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()


I0000 00:00:1717434218.633829   16043 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717434218.678597   68407 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) Xe Graphics (TGL GT2)
W0000 00:00:1717434218.760470   68399 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1717434218.782136   68398 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.

XSMP error:  Offending minor opcode    = 5 (InteractRequest)
             Offending sequence number = 24
             Error class               = BadState
             Severity                  = CanContinue


XSMP error:  Offending minor opcode    = 7 (InteractDone)
             Offending sequence number = 25
             Error class               = BadSta

##### Extracting Landmarks from Video using Pickle

In [9]:
cap=cv2.VideoCapture("tornado_kick_demo.mp4")
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    frame_number=0
    landmarks_data = []
    while cap.isOpened():
        ret, frame = cap.read()
        # Breaks loop when there are no more frames to read
        if not ret:
            break
        
        
        
        # Coloring to RGB
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image.flags.writeable = False

        # Make detection
        results = pose.process(image)

        # Recolor back to BGR
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        # print(results)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            # Save the landmarks data to the list
            landmarks_data.append({
                "frame": frame_number,
                "landmarks": [{
            "id": i,
            "x": landmark.x,
            "y": landmark.y,
            "z": landmark.z,
            "visibility": landmark.visibility
        } for i, landmark in enumerate(landmarks)]
            })
        
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
        cv2.imshow('Reference video', image)
        frame_number += 1

    
        
        if cv2.waitKey(10) & 0xFF==ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    # Save the landmarks data to a pickle file
    with open('landmarks_reference.pkl', 'wb') as file:
        pickle.dump(landmarks_data, file)


[aac @ 0x3f68480] channel element 3.9 is not allocated
I0000 00:00:1717417577.936643   16043 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717417577.940031   33919 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) Xe Graphics (TGL GT2)
W0000 00:00:1717417578.068904   33909 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1717417578.110294   33915 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


#### Code for comparing reference video with Realtime video

Defining Functions

In [4]:
def load_landmarks(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

def extract_landmarks_from_frame(frame, pose):
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = pose.process(image)
    # image.flags.writeable = True
    if results.pose_landmarks:
        landmarks = results.pose_landmarks.landmark
        return [{
            "id": i,
            "x": landmark.x,
            "y": landmark.y,
            "z": landmark.z,
            "visibility": landmark.visibility
        } for i, landmark in enumerate(landmarks)]
    return None


def calculate_landmark_distances(landmarks1, landmarks2):
    distances = []
    for lm1, lm2 in zip(landmarks1, landmarks2):
        sum=[]
        for i,j in zip(lm1,lm2):
            distance = np.sqrt((i['x'] - j['x']) ** 2 + (i['y'] - j['y']) ** 2 + (i['z'] - j['z']) ** 2)
            sum.append(distance)
        distances.append(np.mean(sum))
    #     print(lm1)
    return distances

def interpolate_landmarks(real_time_landmarks, target_frame_count):
    original_frame_count = len(real_time_landmarks)
    # print("Original FrameCount",original_frame_count)
    frame_indices = np.arange(original_frame_count)
    # print(frame_indices)
    target_indices = np.linspace(0, original_frame_count - 1, target_frame_count)
    # print(target_indices)
    
    interpolated_landmarks = []
    for i in range(33):  # Number of landmarks
        xs = [frame['landmarks'][i]['x'] for frame in real_time_landmarks]
        # print(xs)
        ys = [frame['landmarks'][i]['y'] for frame in real_time_landmarks]
        zs = [frame['landmarks'][i]['z'] for frame in real_time_landmarks]
        
        interp_x = interp1d(frame_indices, xs, kind='cubic')(target_indices)
        interp_y = interp1d(frame_indices, ys, kind='cubic')(target_indices)
        interp_z = interp1d(frame_indices, zs, kind='cubic')(target_indices)
        for j, idx in enumerate(target_indices):
            if len(interpolated_landmarks) <= j:
                interpolated_landmarks.append({
                    "frame": j,
                    "landmarks": []
                })
            interpolated_landmarks[j]['landmarks'].append({
                "id": i,
                "x": interp_x[j],
                "y": interp_y[j],
                "z": interp_z[j],
                "visibility": 1.0  # Assuming visibility is 1.0 for interpolated landmarks
            })
    
    return interpolated_landmarks


Loading Landmarks from Pickle File

In [5]:

# Load the pre-recorded landmarks
reference_landmarks = load_landmarks('landmarks_reference.pkl')
# for i in reference_landmarks:
#     print(i['frame'])
 

Recording realtime video and comparing landmarks. Testing needs to be done. Male sure all landmarks are visible.

In [49]:

# Calculate the frame rate of the reference video
cap = cv2.VideoCapture("tornado_kick_demo.mp4")
if not cap.isOpened():
    print("Error: Could not open video.")

# Get the frames per second (fps)
fps = cap.get(cv2.CAP_PROP_FPS)
print(fps)

# Get the total number of frames
frame_count = len(reference_landmarks)

# Calculate duration in seconds
video_duration = frame_count / fps
print(video_duration)

cap.release()  # seconds
# reference_frame_count = len(reference_landmarks)


# Start capturing real-time footage
frame_number = 0
real_time_landmarks = []
cap_realtime = cv2.VideoCapture(0)
print(cap_realtime.get(cv2.CAP_PROP_FPS))
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    start_time = time.time()
    

    while cap_realtime.isOpened() and (time.time() - start_time) < video_duration:
        ret, frame = cap_realtime.read()
        if not ret:
            break
        
        landmarks = extract_landmarks_from_frame(frame, pose)
        if landmarks:
            real_time_landmarks.append({
                "frame": frame_number,
                "landmarks": landmarks
            })
            frame_number += 1

            # Draw landmarks on the real-time frame
            mp_drawing.draw_landmarks(frame, pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # Display the real-time frame
        cv2.imshow('Real-Time Footage', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    print("Time is", time.time()-start_time)
    cap_realtime.release()
    cv2.destroyAllWindows()
    ref_landmarks=[]
    realtime_landmarks=[]
    # for l in real_time_landmarks:
    #     for i in l['landmarks']:
    #         print(i['id'])
    if len(real_time_landmarks) < frame_count:
        print(f"Real-time footage has {len(real_time_landmarks)} frames; reference has {frame_count} frames.")
        real_time_landmarks = interpolate_landmarks(real_time_landmarks, frame_count)
    # for i in real_time_landmarks:
    #     print(i)
    for i in range(frame_count):
        ref_landmarks.append(reference_landmarks[i]['landmarks'])
        realtime_landmarks.append(real_time_landmarks[i]['landmarks'])
    print(len(ref_landmarks), len(real_time_landmarks))
    distances = calculate_landmark_distances(realtime_landmarks, ref_landmarks)
    avg_distance = np.mean(distances)
    print(f"Frame {i}: Average distance: {avg_distance}")



[aac @ 0x4959840] channel element 3.9 is not allocated
I0000 00:00:1717485095.212037   23303 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717485095.222699   27748 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) Xe Graphics (TGL GT2)
W0000 00:00:1717485095.356143   27741 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1717485095.389589   27743 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


30.0
1.8333333333333333
30.0




Time is 1.8971562385559082
Real-time footage has 18 frames; reference has 55 frames.
55 55
Frame 54: Average distance: 0.5518935855845324


In [18]:

# Calculate the frame rate of the reference video
cap = cv2.VideoCapture("tornado_kick_demo.mp4")
if not cap.isOpened():
    print("Error: Could not open video.")

# Get the frames per second (fps)
fps = cap.get(cv2.CAP_PROP_FPS)
print(fps)

# Get the total number of frames
frame_count = len(reference_landmarks)

# Calculate duration in seconds
video_duration = frame_count / fps
print(video_duration)

cap.release()  # seconds
# reference_frame_count = len(reference_landmarks)


# Start capturing real-time footage
frame_number = 0
real_time_landmarks = []
cap_realtime = cv2.VideoCapture(0)
print(cap_realtime.get(cv2.CAP_PROP_FPS))
with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    start_time = time.time()
    

    while cap_realtime.isOpened() and (time.time() - start_time) < video_duration:
        ret, frame = cap_realtime.read()
        if not ret:
            break
        
        landmarks = extract_landmarks_from_frame(frame, pose)
        if landmarks:
            real_time_landmarks.append({
                "frame": frame_number,
                "landmarks": landmarks
            })
            frame_number += 1

            # Draw landmarks on the real-time frame
            mp_drawing.draw_landmarks(frame, pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).pose_landmarks, mp_pose.POSE_CONNECTIONS)

        # Display the real-time frame
        cv2.imshow('Real-Time Footage', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    print("Time is", time.time()-start_time)
    cap_realtime.release()
    cv2.destroyAllWindows()
    ref_landmarks=[]
    realtime_landmarks=[]
    # for l in real_time_landmarks:
    #     for i in l['landmarks']:
    #         print(i['id'])
    if len(real_time_landmarks) < frame_count:
        print(f"Real-time footage has {len(real_time_landmarks)} frames; reference has {frame_count} frames.")
        real_time_landmarks = interpolate_landmarks(real_time_landmarks, frame_count)
    # for i in real_time_landmarks:
    #     print(i)
    for i in range(frame_count):
        ref_landmarks.append(reference_landmarks[i]['landmarks'])
        realtime_landmarks.append(real_time_landmarks[i]['landmarks'])
    print(len(ref_landmarks), len(real_time_landmarks))
    distances = calculate_landmark_distances(realtime_landmarks, ref_landmarks)
    avg_distance = np.mean(distances)
    avg_cosine_distance=[]
    for i in range(frame_count):
        cosine_distances = [[realtime_landmarks[i][j]['x'],realtime_landmarks[i][j]['y'],realtime_landmarks[i][j]['z']] for j in range(33)]
        avg_cosine_distance.append(np.mean(cosine_distances))
    acc=np.mean(avg_cosine_distance)
    print(f"Frame {i}: Average distance: {(1-acc/2)*100}")
    # print([[realtime_landmarks[1][j]['x'],realtime_landmarks[1][j]['y'],realtime_landmarks[1][j]['z']] for j in range(33)])

[aac @ 0x5383280] channel element 3.9 is not allocated
I0000 00:00:1717571742.174386   39833 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1717571742.176268   41505 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) Xe Graphics (TGL GT2)
W0000 00:00:1717571742.298132   41495 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1717571742.340071   41496 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


30.0
1.8333333333333333
30.0
Time is 1.8407742977142334
Real-time footage has 0 frames; reference has 55 frames.


ValueError: cannot reshape array of size 0 into shape (0,newaxis)