In [1]:
video_ids = [2] # range(1, 6)
frame_skip = 60

In [2]:
# Load the JSON metadata for each video and store the paths, hits, and serves
import json

labels = {}
for id in video_ids:
    with open(f'data/json/video{id}.json') as f:
        video_data = json.load(f)

        video_start = video_data['match']['start']
        video_end = video_data['match']['end']
        
        # Create a dictionary to store the labels for each frame
        video_labels = {
            frame: {
                'shot': None,
                'player': None,
                'side': None,
                'type': None,
            }
            for frame in range(video_start, video_end)
        }

        # Add the labels for each hit
        for hit in video_data['hits']:
            hit_start = int(hit['start'])
            hit_end = int(hit['end'])
            hit_label = hit['custom']

            for frame in range(hit_start, hit_end):
                video_labels[frame] = {
                    'shot': 'Hit',
                    'player': hit_label['Player'], #near or far
                    'side': hit_label['Side'],
                    'type': hit_label['Type'],
                }

        # Add the labels for each serve
        for serve in video_data['serves']:
            serve_start = int(serve['start'])
            serve_end = int(serve['end'])
            serve_label = serve['custom']

            for frame in range(serve_start, serve_end):
                video_labels[frame] = {
                    'shot': 'Serve',
                    'player': serve_label['Player'], #near or far
                    'side': serve_label['Result'],
                    'type': None,
                }

        labels[id] = video_labels

In [3]:
import numpy as np

# Flatten the labels dictionary into a list of tuples
frame_labels = np.array([
    [id, frame, label['shot'], label['player'], label['side'], label['type']]
    for id, video_labels in labels.items()
    for frame, label in video_labels.items()
])

# Convert the shot, player, side, and type fields to ints
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for i in range(2, 6):
    label_encoders[i] = LabelEncoder()
    frame_labels[:, i] = label_encoders[i].fit_transform(frame_labels[:, i])

# Prune to every 10th frame
frame_labels = frame_labels[::frame_skip]

# Display the resulting NumPy array
print(frame_labels)

# Save the resulting NumPy array to a CSV file
np.savetxt('data/labels.csv', frame_labels, delimiter=',', fmt='%s')

[[2 14453 2 2 5 6]
 [2 14513 2 2 5 6]
 [2 14573 2 2 5 6]
 ...
 [2 134993 2 2 5 6]
 [2 135053 1 1 1 6]
 [2 135113 2 2 5 6]]


In [4]:
# Pull frames from the videos
import cv2

frames = {}
for id in [2]:
    video_path = f'data/videos/video{id}.mp4'
    video = cv2.VideoCapture(video_path)

    frames[id] = []
    for frame_num in range(video_start, video_end, frame_skip):
        print(f'Loading video {id}... ({frame_num-video_start}/{video_end-video_start})', end='\r')
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        ret, frame = video.read()
        frames[id].append(frame)

    video.release()
print('Videos loaded. ' + ' '*30)

Videos loaded.                               


In [5]:
num_frames = len(frames[2])
print(frames[2])
frames_array = np.zeros((num_frames, 720, 1280, 3), dtype=np.uint8)
for i, frame in enumerate(frames[2]):
    frames_array[i] = frame

print(frames_array.shape)

[array([[[134,  95,  95],
        [137,  98,  98],
        [134,  95,  95],
        ...,
        [ 34,  19,  20],
        [ 34,  19,  20],
        [ 34,  19,  20]],

       [[130,  91,  91],
        [134,  95,  95],
        [130,  91,  91],
        ...,
        [ 35,  20,  21],
        [ 35,  20,  21],
        [ 35,  20,  21]],

       [[131,  92,  92],
        [135,  96,  96],
        [132,  93,  93],
        ...,
        [ 86,  73,  69],
        [ 86,  73,  69],
        [ 86,  73,  69]],

       ...,

       [[114, 161, 144],
        [112, 159, 142],
        [112, 159, 142],
        ...,
        [135, 152, 156],
        [133, 150, 154],
        [130, 147, 151]],

       [[114, 161, 144],
        [113, 160, 143],
        [113, 160, 143],
        ...,
        [133, 150, 154],
        [132, 149, 153],
        [132, 149, 153]],

       [[115, 162, 145],
        [113, 160, 143],
        [112, 159, 142],
        ...,
        [132, 149, 153],
        [133, 150, 154],
        [135, 152, 156]

In [6]:
import copy
import matplotlib.pyplot as plt

from src import util
from src.body import Body

body_estimation = Body('model/body_pose_model.pth')

data = []
labels = []
for i in range(num_frames):
    print(f'Processing frame {i}/{num_frames}', end='\r')
    img = frames_array[i]
    candidate, subset = body_estimation(img)

    if (subset.shape[0] != 1):
        data.append(np.zeros((18, 2)))
        labels.append(frame_labels[i])
        continue
    
    candidate = candidate.astype(int)
    subset = subset[:1].astype(int)

    pose = np.zeros((18, 2))
    for i in range(18):
        if subset[0][i] > -1:
            pose[i] = candidate[subset[0][i].astype(int), :2][0]
    
    data.append(pose)
    labels.append(frame_labels[i])

Processing frame 5/2012

KeyboardInterrupt: 

In [None]:
np.save('data/poses.npy', data)
np.save('data/pose_labels.npy', labels)