In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('combined_normalized_data.csv')

# Removing type_of_shot (they're all the same)
df = df.drop('type_of_shot', axis = 1)

df.head()

Unnamed: 0,id,frame_count,kpt_11_x,kpt_11_y,kpt_12_x,kpt_12_y,kpt_13_x,kpt_13_y,kpt_14_x,kpt_14_y,kpt_15_x,kpt_15_y,kpt_16_x,kpt_16_y,kpt_23_x,kpt_23_y,kpt_24_x,kpt_24_y
0,101,0,0.809271,0.338811,0.862334,0.315135,0.839387,0.399822,0.915538,0.369491,0.806322,0.453889,0.900765,0.414723,0.896488,0.452383,0.932868,0.437896
1,101,1,0.81582,0.348621,0.866793,0.327599,0.84292,0.411382,0.919713,0.382588,0.810617,0.466918,0.908947,0.431616,0.89637,0.459178,0.933026,0.446713
2,101,2,0.817738,0.364275,0.870809,0.340094,0.844992,0.426463,0.916766,0.394386,0.814951,0.483774,0.91884,0.453083,0.897649,0.476661,0.936327,0.461655
3,101,3,0.820303,0.37821,0.874671,0.351728,0.844438,0.441129,0.926998,0.40514,0.812847,0.497191,0.923088,0.456161,0.897812,0.491934,0.937013,0.474584
4,101,4,0.82417,0.388362,0.880652,0.360844,0.836773,0.455118,0.932521,0.412689,0.805766,0.506311,0.930001,0.461866,0.901121,0.509175,0.943813,0.491889


In [4]:
# Replace zeros with NaN (missing points)
coords = [col for col in df.columns if "kpt" in col]
df[coords] = df[coords].replace(0.0, np.nan)

# Optionally interpolate missing values frame-by-frame
df[coords] = df.groupby('id')[coords].transform(lambda group: group.interpolate(limit_direction='both'))

In [6]:
features = []

for shot_id, group in df.groupby('id'):
    f = {}
    for col in coords:
        f[f'{col}_mean'] = group[col].mean()
        f[f'{col}_std'] = group[col].std()
    f['shot_id'] = shot_id  # Keep track of which shot this is
    features.append(f)

features_df = pd.DataFrame(features)
features_df.head()

Unnamed: 0,kpt_11_x_mean,kpt_11_x_std,kpt_11_y_mean,kpt_11_y_std,kpt_12_x_mean,kpt_12_x_std,kpt_12_y_mean,kpt_12_y_std,kpt_13_x_mean,kpt_13_x_std,...,kpt_16_y_std,kpt_23_x_mean,kpt_23_x_std,kpt_23_y_mean,kpt_23_y_std,kpt_24_x_mean,kpt_24_x_std,kpt_24_y_mean,kpt_24_y_std,shot_id
0,0.662196,0.133383,0.50229,0.091416,0.699006,0.143665,0.497488,0.099642,0.666761,0.134974,...,0.128364,0.690736,0.138635,0.634654,0.095988,0.721195,0.137756,0.632189,0.099985,101
1,0.69435,0.092639,0.515491,0.097944,0.73793,0.088409,0.508573,0.091317,0.689688,0.096141,...,0.113829,0.720222,0.082659,0.626306,0.095378,0.749943,0.079192,0.622725,0.09087,102
2,0.805627,0.125254,0.395902,0.081165,0.856845,0.129841,0.39806,0.08927,0.79963,0.119215,...,0.136614,0.807657,0.118692,0.556826,0.082443,0.844091,0.119866,0.560718,0.087036,103
3,0.666259,0.141745,0.419443,0.084084,0.718786,0.137326,0.418324,0.077087,0.65288,0.14171,...,0.131597,0.680623,0.13289,0.569817,0.085025,0.71723,0.128065,0.567469,0.080234,104
4,0.810448,0.069087,0.396727,0.096674,0.851369,0.07649,0.398748,0.099253,0.803389,0.074608,...,0.148088,0.820762,0.059023,0.538807,0.101648,0.852389,0.065868,0.541897,0.102325,105


In [7]:
ideal_id = 'user_video'  # choose one good sequence
ideal = df[df['id'] == ideal_id]

In [9]:
def get_sequence(group, joints=[12, 11, 14, 13, 16, 15, 24, 23]):  # upper-body joints for smash
    data = []
    for i in joints:
        data.append(group[f'kpt_{i}_x'].values)
        data.append(group[f'kpt_{i}_y'].values)
    return np.array(data).flatten()

sequences = {shot_id: get_sequence(g) for shot_id, g in df.groupby('id')}

In [10]:
from dtaidistance import dtw

ideal_seq = get_sequence(ideal)
scores = {}

for shot_id, seq in sequences.items():
    dist = dtw.distance(seq, ideal_seq)
    scores[shot_id] = 1 / (1 + dist) 

In [11]:
score_df = pd.DataFrame(list(scores.items()), columns=['id', 'form_score'])
score_df = score_df.sort_values('form_score', ascending=False)
print(score_df.head())

            id  form_score
50  user_video    1.000000
13         114    0.217490
34         135    0.210695
47         148    0.205503
33         134    0.205440
