In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('smash_v1.csv')

# Removing type_of_shot (they're all the same)
df = df.drop('type_of_shot', axis = 1)

df.head()

Unnamed: 0,id,frame_count,kpt_0_x,kpt_0_y,kpt_1_x,kpt_1_y,kpt_2_x,kpt_2_y,kpt_3_x,kpt_3_y,...,kpt_12_x,kpt_12_y,kpt_13_x,kpt_13_y,kpt_14_x,kpt_14_y,kpt_15_x,kpt_15_y,kpt_16_x,kpt_16_y
0,101,0,0.0,0.0,0.0,0.0,0.0,0.0,687.99646,262.951782,...,815.209229,396.171082,757.796814,508.305786,787.642578,469.871643,776.365906,615.642883,824.935242,551.207275
1,101,1,0.0,0.0,0.0,0.0,0.0,0.0,691.630188,274.037262,...,815.347778,404.14743,761.215027,518.545471,791.998474,475.096252,784.513,624.796265,835.347778,552.185242
2,101,2,0.0,0.0,0.0,0.0,0.0,0.0,694.214355,282.918457,...,818.231995,417.66626,758.630005,530.391785,792.194336,486.224915,796.893921,634.304871,846.007263,564.999878
3,101,3,0.0,0.0,0.0,0.0,0.0,0.0,696.82782,296.562622,...,818.831787,429.363342,752.453308,540.809692,793.435059,493.834106,799.39386,634.709961,851.539612,566.061462
4,101,4,0.0,0.0,0.0,0.0,0.0,0.0,700.760315,308.873901,...,824.773682,445.019165,740.424377,543.479431,793.883667,498.106964,796.599182,630.698975,851.529419,566.537598


In [None]:
# Replace zeros with NaN (missing points)
coords = [col for col in df.columns if "kpt" in col]
df[coords] = df[coords].replace(0.0, np.nan)

# Optionally interpolate missing values frame-by-frame
df[coords] = df.groupby('id')[coords].transform(lambda group: group.interpolate(limit_direction='both'))

In [None]:
def normalize_pose(group):
    mid_hip_x = (group['kpt_11_x'] + group['kpt_12_x']) / 2
    mid_hip_y = (group['kpt_11_y'] + group['kpt_12_y']) / 2
    for i in range(17):
        group[f'kpt_{i}_x'] = group[f'kpt_{i}_x'] - mid_hip_x
        group[f'kpt_{i}_y'] = group[f'kpt_{i}_y'] - mid_hip_y
    return group

df = df.groupby('id').apply(normalize_pose).reset_index(drop=True)

  df = df.groupby('id').apply(normalize_pose).reset_index(drop=True)


In [6]:
features = []

for shot_id, group in df.groupby('id'):
    f = {}
    for col in coords:
        f[f'{col}_mean'] = group[col].mean()
        f[f'{col}_std'] = group[col].std()
    f['shot_id'] = shot_id  # Keep track of which shot this is
    features.append(f)

features_df = pd.DataFrame(features)
features_df.head()

Unnamed: 0,kpt_0_x_mean,kpt_0_x_std,kpt_0_y_mean,kpt_0_y_std,kpt_1_x_mean,kpt_1_x_std,kpt_1_y_mean,kpt_1_y_std,kpt_2_x_mean,kpt_2_x_std,...,kpt_14_y_std,kpt_15_x_mean,kpt_15_x_std,kpt_15_y_mean,kpt_15_y_std,kpt_16_x_mean,kpt_16_x_std,kpt_16_y_mean,kpt_16_y_std,shot_id
0,-474.94818,288.997022,-453.928375,199.517995,-553.331737,226.775256,-522.747051,155.370257,-526.534703,249.908892,...,28.841993,-32.192929,72.439193,173.407426,27.370706,27.987834,73.003137,162.856574,37.042371,101
1,-599.51852,236.411819,-487.041008,168.758984,-663.715342,150.489135,-533.683888,119.546574,-664.113239,149.600097,...,18.942818,-22.071511,39.73038,140.948861,28.811715,34.364471,65.331936,131.444202,29.540732,102
2,-1086.173343,246.246137,-442.379932,82.718577,-1085.990207,247.057741,-442.6385,81.861961,-1115.797158,160.929421,...,23.046228,-72.003666,35.259131,160.260085,24.387269,32.843283,45.612793,185.117416,37.473144,103
3,-718.380276,244.064048,-426.628693,106.417669,-736.504253,217.199169,-433.828783,95.057592,-739.197833,216.414549,...,44.558533,-71.640125,79.266788,148.364564,33.743774,64.641566,69.139606,128.8705,61.205562,104
4,-1057.562892,269.64889,-459.82795,110.299561,-1126.156516,83.692495,-472.644521,89.122173,-1058.367624,271.344026,...,31.326905,-67.804588,50.795516,149.810586,31.948048,34.857363,60.292221,179.042213,47.537083,105


In [7]:
ideal_id = 101  # choose one good sequence
ideal = df[df['id'] == ideal_id]

In [None]:
def get_sequence(group, joints=[5,6,7,8,9,10,11,12]):  # upper-body joints for smash
    data = []
    for i in joints:
        data.append(group[f'kpt_{i}_x'].values)
        data.append(group[f'kpt_{i}_y'].values)
    return np.array(data).flatten()

sequences = {shot_id: get_sequence(g) for shot_id, g in df.groupby('id')}

In [None]:
from dtaidistance import dtw

ideal_seq = get_sequence(ideal)
scores = {}

for shot_id, seq in sequences.items():
    dist = dtw.distance(seq, ideal_seq)
    scores[shot_id] = 1 / (1 + dist) 

In [None]:
score_df = pd.DataFrame(list(scores.items()), columns=['id', 'form_score'])
score_df = score_df.sort_values('form_score', ascending=False)
print(score_df.head())

     id  form_score
0   101    1.000000
48  149    0.000684
44  145    0.000640
33  134    0.000605
1   102    0.000581
