In [12]:
# Imports
import pandas as pd
import tensorflow as tf
import cv2 as cv
import numpy as np

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.vision import PoseLandmarkerResult

In [2]:
# Absolute Path to the Model
modelPath = '/home/matt/Documents/projects/swingAnalysis/swing-analysis-prototyping/models/pose_landmarker_full.task'

# Set up MediaPipe base options
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

In [4]:
# Read in the swing segmentation data
df = pd.read_pickle('../data/golfDB.pkl')
# df['events'][0]
df = df[df['sex'] == 'm']

# Get the faceOn Data
df_faceOn = df[df['view'] == 'face-on']
df_faceOn = df_faceOn[df_faceOn['slow'] == 0]
df_faceOn

Unnamed: 0,id,youtube_id,player,sex,club,view,slow,events,bbox,split
10,10,gOBVh7fzyZo,STEVE STRICKER,m,driver,face-on,0,"[395, 475, 488, 492, 499, 504, 507, 509, 524, ...","[0.10625, 0.0006944444444444445, 0.80078125, 1.0]",1
14,14,vN3Uc_EhnnY,GREG NORMAN,m,driver,face-on,0,"[457, 497, 513, 515, 523, 527, 531, 533, 546, ...","[0.05234375000000001, 0.0006944444444444445, 0...",2
21,21,xD6KDqPF9cc,TIGER WOODS,m,iron,face-on,0,"[211, 248, 261, 264, 272, 275, 278, 280, 299, ...","[0.09062500000000001, 0.0006944444444444445, 0...",2
29,29,B1uIW4LN16Q,BRANFORD MARSALIS,m,driver,face-on,0,"[245, 285, 303, 305, 317, 322, 325, 327, 373, ...","[0.08203125000000001, 0.03333333333333333, 0.6...",1
41,41,j6h-3Ez0714,COLIN MONTGOMERIE,m,driver,face-on,0,"[277, 298, 311, 315, 325, 328, 332, 334, 358, ...","[0.08046875, 0.0006944444444444445, 0.79765625...",2
...,...,...,...,...,...,...,...,...,...,...
1293,1293,c2zzaiV49fE,DANIEL BERGER,m,driver,face-on,0,"[451, 514, 526, 531, 539, 545, 548, 549, 562, ...","[0.06953125000000002, 0.0006944444444444445, 0...",3
1303,1303,oGTKW_UbusM,TIGER WOODS,m,iron,face-on,0,"[871, 1082, 1094, 1097, 1105, 1108, 1111, 1113...","[0.1203125, 0.0006944444444444445, 0.71015625,...",4
1348,1348,Yigxi0Oc5V4,BEN ROETHLISBERGER,m,driver,face-on,0,"[331, 566, 581, 586, 594, 600, 603, 605, 620, ...","[0.06640625000000001, 0.0006944444444444445, 0...",2
1388,1388,GXn3A0IuWsE,ROGER CLEMENS,m,driver,face-on,0,"[211, 255, 265, 270, 276, 280, 284, 286, 297, ...","[0.12109375, 0.09583333333333334, 0.65703125, ...",1


In [5]:
# Create a pose landmarker instance with video mode on
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=modelPath),
    running_mode=VisionRunningMode.IMAGE
)

In [32]:
full_dataset = []

# Run the Landmarker
with PoseLandmarker.create_from_options(options) as landmarker:
    # Iterate through each player in the dataframe
    for row in df_faceOn.iterrows():
        # Set up OpenCV Video Capture
        cap = cv.VideoCapture(f'../data/videos_160/{row[1]["id"]}.mp4')
        
        events = row[1]['events']
        events = (events - events[0])[1:-1]

        for id, index in enumerate(events):
            # Jump to the frames where each event happens
            cap.set(cv.CAP_PROP_POS_FRAMES, index)
            success, frame = cap.read()
            if not success:
                break

            # Convert the image to mp image
            mpImage = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

            # Get the landmarks for the image
            pose_landmarker_result = landmarker.detect(mpImage)

            if pose_landmarker_result.pose_landmarks:
                # Convert the landmarks to format that can be passed into model
                landmarks = pose_landmarker_result.pose_landmarks
                flat_landmarks = np.array([[l.x, l.y, l.z] for l in landmarks[0]]).flatten()
                
                # Append their classification
                flat_landmarks = np.append(flat_landmarks, [id])
    
                # Append to the dataset
                full_dataset.append(flat_landmarks)
            
            # cv.imshow('Pose Detection', frame)
            # if cv.waitKey(1) == ord('q'): # Change wait time to play video slower or faster
            #     break
    cap.release()
    cv.destroyAllWindows()

I0000 00:00:1750649004.973743    7246 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1750649004.975829   15869 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.3-1ubuntu2), renderer: zink Vulkan 1.4(NVIDIA GeForce RTX 4070 (NVIDIA_PROPRIETARY))
W0000 00:00:1750649005.031716   15870 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1750649005.069405   15884 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [39]:
# Convert Dataset Into Full Numpy
segmented_pose = np.array(full_dataset)
np.save('../data/segmentedPose.npy', segmented_pose)

In [40]:
segmented_pose

array([[ 0.44609961,  0.39145032, -0.564803  , ...,  0.80848098,
        -0.05791443,  0.        ],
       [ 0.43232977,  0.37883836, -0.35494781, ...,  0.79941154,
        -0.05981613,  1.        ],
       [ 0.43555912,  0.37950003, -0.22691868, ...,  0.80286318,
         0.00945691,  2.        ],
       ...,
       [ 0.392369  ,  0.38292986, -0.61023331, ...,  0.83600318,
         0.14274071,  5.        ],
       [ 0.40628374,  0.36938253, -0.40240481, ...,  0.83668685,
         0.01543261,  6.        ],
       [ 0.51647913,  0.29077929, -0.01061871, ...,  0.8372432 ,
        -0.20644729,  7.        ]])