In [63]:
# Imports
import pandas as pd
import tensorflow as tf
import cv2 as cv
import numpy as np
import random

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.tasks.python.vision import PoseLandmarkerResult

In [64]:
# Get angles for the following measurements
# 1) Left arm
# 2) Right arm
# 3) Shoulders relative to flat
# Will return the values as an numpy array that can be appended to the landmark list
def get_angles(landmarks):
    # Get the necessary points
    leftShoulder = landmarks[0][11]
    leftElbow = landmarks[0][13]
    leftWrist = landmarks[0][15]
    rightShoulder = landmarks[0][12]
    rightElbow = landmarks[0][14]
    rightWrist = landmarks[0][16]

    # Shoulder Angle
    LS = np.array([leftShoulder.x, leftShoulder.y, leftShoulder.z])
    RS = np.array([rightShoulder.x, rightShoulder.y, rightShoulder.z])
    P3 = np.array([leftShoulder.x, rightShoulder.y, leftShoulder.z]) # Third point to calculate angle against flat

    shoulderAngle = angle_between_points(LS, RS, P3)

    # Left Arm Angle
    LE = np.array([leftElbow.x, leftElbow.y, leftElbow.z])
    LW = np.array([leftWrist.x, leftWrist.y, leftWrist.z])

    leftArmAngle = angle_between_points(LS, LE, LW)

    # Right Arm Angle
    RE = np.array([rightElbow.x, rightElbow.y, rightElbow.z])
    RW = np.array([rightWrist.x, rightWrist.y, rightWrist.z])

    rightArmAngle = angle_between_points(RS, RE, RW)

    return np.array([shoulderAngle, leftArmAngle, rightArmAngle])
    
def angle_between_points(p1, p2, p3):
    # Get vectors from points
    v1 = np.array(p1) - np.array(p2)
    v2 = np.array(p3) - np.array(p2)

    dot_product = np.dot(v1, v2)
    magnitude_v1 = np.linalg.norm(v1)
    magnitude_v2 = np.linalg.norm(v2)

    if magnitude_v1 == 0 or magnitude_v2 == 0:
        return 0.0  # Avoid division by zero

    cosine_angle = dot_product / (magnitude_v1 * magnitude_v2)
    cosine_angle = np.clip(cosine_angle, -1.0, 1.0)

    return np.degrees(np.arccos(cosine_angle))

In [1]:
# Create a function that holds the process for getting pose and adding it to a dataset
def process_frame_and_add(lm, frame_to_use, id, other_features, view):
    # Convert the image to mp image
    mpImage = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_to_use)

    # Get the landmarks for the image
    pose_landmarker_result = lm.detect(mpImage)

    if pose_landmarker_result.pose_landmarks:
        # Convert the landmarks to format that can be passed into model
        landmarks = pose_landmarker_result.pose_landmarks
        flat_landmarks = np.array([[l.x, l.y, l.z] for l in landmarks[0]]).flatten()

        # Append other useful information from the dataframe
        flat_data = np.append(flat_landmarks, other_features)

        # Append angle information
        flat_data = np.append(flat_data, get_angles(landmarks))
        
        # Append their classification
        flat_data = np.append(flat_data, [id])

        # Append to the dataset
        return flat_data

In [66]:
# Create a function for performing data augmentation
def augment_frame(frame):
    # Random affine: rotation/shear between -5° to 5°
    angle = random.uniform(-5, 5)
    h, w = frame.shape[:2]
    center = (w // 2, h // 2)
    
    # Rotation matrix
    rot_matrix = cv.getRotationMatrix2D(center, angle, 1.0)
    
    # Apply affine rotation/shear
    frame_rot = cv.warpAffine(frame, rot_matrix, (w, h), flags=cv.INTER_LINEAR, borderMode=cv.BORDER_REFLECT)

    # Random horizontal flip (50% chance)
    if random.random() < 0.5:
        frame_rot = cv.flip(frame_rot, 1)

    return frame_rot

In [67]:
# Absolute Path to the Model
modelPath = '/home/matt/Documents/projects/swingAnalysis/swing-analysis-prototyping/models/pose_landmarker_full.task'

# Set up MediaPipe base options
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

In [68]:
# Read in the swing segmentation data
df = pd.read_pickle('../data/GolfDB_Filtered.pkl')
df['sex'] = df['sex'].astype('category')
df['club'] = df['club'].astype('category')
df['view'] = df['view'].astype('category')

df['sex_codes'] = df['sex'].cat.codes
df['club_codes'] = df['club'].cat.codes
df['view_codes'] = df['view'].cat.codes

df = df.drop(columns=['youtube_id','handedness','bbox', 'player', 'split'])
# df = df[df['view'] == 'face-on']
df

Unnamed: 0,id,sex,club,view,slow,events,sex_codes,club_codes,view_codes
0,0,f,driver,down-the-line,0,"[408, 455, 473, 476, 490, 495, 498, 501, 514, ...",0,0,0
2,2,m,driver,down-the-line,0,"[521, 659, 678, 683, 692, 696, 698, 701, 715, ...",1,0,0
4,4,f,driver,down-the-line,0,"[157, 170, 183, 188, 197, 201, 205, 207, 220, ...",0,0,0
6,6,m,driver,down-the-line,0,"[246, 298, 310, 314, 324, 329, 332, 334, 351, ...",1,0,0
8,8,f,driver,face-on,0,"[288, 317, 333, 335, 347, 352, 355, 357, 371, ...",0,0,1
...,...,...,...,...,...,...,...,...,...
1388,1388,m,driver,face-on,0,"[211, 255, 265, 270, 276, 280, 284, 286, 297, ...",1,0,1
1392,1392,m,driver,down-the-line,0,"[61, 251, 267, 273, 287, 290, 293, 295, 308, 340]",1,0,0
1394,1394,f,iron,down-the-line,0,"[301, 539, 555, 560, 568, 572, 575, 578, 595, ...",0,3,0
1396,1396,m,driver,face-on,0,"[121, 394, 407, 414, 427, 431, 435, 437, 452, ...",1,0,1


In [69]:
# Create a pose landmarker instance with video mode on
options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=modelPath),
    running_mode=VisionRunningMode.IMAGE
)

In [70]:
full_dataset = []

# Run the Landmarker
with PoseLandmarker.create_from_options(options) as landmarker:
    # Iterate through each player in the dataframe
    for rowIdx, row in enumerate(df.iterrows()):
        print(f'Row {rowIdx+1}/{len(df)}')
        # Set up OpenCV Video Capture
        cap = cv.VideoCapture(f'../data/videos_160/{row[1]["id"]}.mp4')
        
        events = row[1]['events']
        events = (events - events[0])[1:-1]

        # Create other features for this row (consistent through time)
        other_features = [row[1]['sex_codes'], row[1]['club_codes'], row[1]['view_codes']]
        
        for label_id, index in enumerate(events):
            # Jump to the frames where each event happens
            cap.set(cv.CAP_PROP_POS_FRAMES, index)
            success, frame = cap.read()
            if not success:
                break

            full_dataset.append(process_frame_and_add(landmarker, frame, label_id, other_features, row[1]['view']))
            
            # Generate 0–3 augmented versions
            n_aug = random.randint(0, 4)
            for _ in range(n_aug):
                aug_frame = augment_frame(frame)
                full_dataset.append(process_frame_and_add(landmarker, aug_frame, label_id, other_features, row[1]['view']))
            
            # cv.imshow('Pose Detection', frame)
            # if cv.waitKey(1) == ord('q'): # Change wait time to play video slower or faster
            #     break
    cap.release()
    cv.destroyAllWindows()

I0000 00:00:1750874180.402359  110389 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1750874180.404207  172089 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 25.0.3-1ubuntu2), renderer: zink Vulkan 1.4(NVIDIA GeForce RTX 4070 (NVIDIA_PROPRIETARY))
W0000 00:00:1750874180.456423  172092 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1750874180.490729  172102 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Row 1/538
Row 2/538
Row 3/538
Row 4/538
Row 5/538
Row 6/538
Row 7/538
Row 8/538
Row 9/538
Row 10/538
Row 11/538
Row 12/538
Row 13/538
Row 14/538
Row 15/538
Row 16/538
Row 17/538
Row 18/538
Row 19/538
Row 20/538
Row 21/538
Row 22/538
Row 23/538
Row 24/538
Row 25/538
Row 26/538
Row 27/538
Row 28/538
Row 29/538
Row 30/538
Row 31/538
Row 32/538
Row 33/538
Row 34/538
Row 35/538
Row 36/538
Row 37/538
Row 38/538
Row 39/538
Row 40/538
Row 41/538
Row 42/538
Row 43/538
Row 44/538
Row 45/538
Row 46/538
Row 47/538
Row 48/538
Row 49/538
Row 50/538
Row 51/538
Row 52/538
Row 53/538
Row 54/538
Row 55/538
Row 56/538
Row 57/538
Row 58/538
Row 59/538
Row 60/538
Row 61/538
Row 62/538
Row 63/538
Row 64/538
Row 65/538
Row 66/538
Row 67/538
Row 68/538
Row 69/538
Row 70/538
Row 71/538
Row 72/538
Row 73/538
Row 74/538
Row 75/538
Row 76/538
Row 77/538
Row 78/538
Row 79/538
Row 80/538
Row 81/538
Row 82/538
Row 83/538
Row 84/538
Row 85/538
Row 86/538
Row 87/538
Row 88/538
Row 89/538
Row 90/538
Row 91/538
Row 92/5

In [71]:
# Convert Dataset Into Full Numpy
segmented_pose = np.array(full_dataset)
# np.save('../data/segmentedPose.npy', segmented_pose)
np.save('../data/segmentedPose_augmented.npy', segmented_pose)