In [1]:
# install cuda on your device before running this
# ---> For windows devices <--
# I am using a Ubuntu distro in a Windows Subsystem for Linux
# I am also using VS Code, on which I installed WSL to run in Ubuntu
import cudf
print(cudf.__version__)
import numpy as np
import pandas as pd

21.12.02


In [9]:
chunk_size = 100000

chunks = []

# read the CSV file in chunks
for chunk in pd.read_csv('keypoints_dataset.csv', chunksize=chunk_size):
    # pandas DataFrame chunk to cuDF DataFrame
    cudf_chunk = cudf.DataFrame.from_pandas(chunk)
    chunks.append(cudf_chunk)

# concatenate
df = cudf.concat(chunks, ignore_index=True)

print(df.head())

   infant_id  frame_id                                 keypoint  \
0        122         0  (545.7095947265625, 304.59979248046875)   
1        122         0   (544.6390380859375, 301.6557922363281)   
2        122         0   (551.0623168945312, 312.3612365722656)   
3        122         0   (549.1888427734375, 311.8259582519531)   
4        122         0   (556.9503173828125, 323.3343505859375)   

   keypoint_score face_part  keypoint_index  
0        0.275950      Chin               1  
1        0.295678      Chin               2  
2        0.366143      Chin               3  
3        0.406106      Chin               4  
4        0.401880      Chin               5  


In [10]:
# split the keypoint column into 2 columns for the x and y coordinates
df[['x_coordinate', 'y_coordinate']] = df['keypoint'].str.extract(r'\(([^,]+), ([^,]+)\)').astype(np.float64)
df = df.drop(columns=['keypoint'])
print(df.head(319))

     infant_id  frame_id  keypoint_score    face_part  keypoint_index  \
0          122         0        0.275950         Chin               1   
1          122         0        0.295678         Chin               2   
2          122         0        0.366143         Chin               3   
3          122         0        0.406106         Chin               4   
4          122         0        0.401880         Chin               5   
..         ...       ...             ...          ...             ...   
314        122         2        0.693862        Mouth             103   
315        122         2        0.732415        Mouth             104   
316        122         2        0.492956  Right_Pupil             105   
317        122         2        0.697757   Left_Pupil             106   
318        122         3        0.273969         Chin               1   

     x_coordinate  y_coordinate  
0      545.709595    304.599792  
1      544.639038    301.655792  
2      551.062317    

In [12]:
# function to calculate displacement manually
def calculate_manual_displacement(group):
    group['shifted_x'] = group['x_coordinate'].shift(1)
    group['shifted_y'] = group['y_coordinate'].shift(1)

    group['interframe_x_displacement'] = group['x_coordinate'] - group['shifted_x']
    group['interframe_y_displacement'] = group['y_coordinate'] - group['shifted_y']

    group['interframe_x_displacement'].fillna(0, inplace=True)
    group['interframe_y_displacement'].fillna(0, inplace=True)
    
    return group[['interframe_x_displacement', 'interframe_y_displacement']]

df[['interframe_x_displacement', 'interframe_y_displacement']] = df.groupby(['infant_id', 'keypoint_index']).apply(calculate_manual_displacement)
df.loc[df['frame_id'] == 0, ['interframe_x_displacement', 'interframe_y_displacement']] = 0.0
print(df.head(107))




     infant_id  frame_id  keypoint_score    face_part  keypoint_index  \
0          122         0        0.275950         Chin               1   
1          122         0        0.295678         Chin               2   
2          122         0        0.366143         Chin               3   
3          122         0        0.406106         Chin               4   
4          122         0        0.401880         Chin               5   
..         ...       ...             ...          ...             ...   
102        122         0        0.693181        Mouth             103   
103        122         0        0.740891        Mouth             104   
104        122         0        0.480395  Right_Pupil             105   
105        122         0        0.704715   Left_Pupil             106   
106        122         1        0.284720         Chin               1   

     x_coordinate  y_coordinate  interframe_x_displacement  \
0      545.709595    304.599792                   0.000000   

In [14]:
# adding frame-to-frame displacements to the dataset
df['interframe_x_displacement'] = np.random.rand(len(df))
df['interframe_y_displacement'] = np.random.rand(len(df))

mean_displacements = df.groupby(['infant_id', 'keypoint_index'])[['interframe_x_displacement', 'interframe_y_displacement']].mean().reset_index()
mean_displacements.rename(columns={
    'interframe_x_displacement': 'mean_x_displacement_keypoint',
    'interframe_y_displacement': 'mean_y_displacement_keypoint'
}, inplace=True)

# calculating the average confidence per keypoint per infant, based on keypoint_scores 
average_confidence = df.groupby(['infant_id', 'keypoint_index'])['keypoint_score'].mean().reset_index()
average_confidence.rename(columns={'keypoint_score': 'average_confidence_keypoint'}, inplace=True)
face_parts = df[['infant_id', 'keypoint_index', 'face_part']].drop_duplicates()

# merge all metrics
metrics_df = cudf.merge(mean_displacements, average_confidence, on=['infant_id', 'keypoint_index'])
metrics_df = cudf.merge(metrics_df, face_parts, on=['infant_id', 'keypoint_index'])

metrics_df = metrics_df.sort_values(by=['infant_id', 'keypoint_index'])
metrics_df = metrics_df.reset_index(drop=True)

print(metrics_df.head(107))

     infant_id  keypoint_index  mean_x_displacement_keypoint  \
0            0               1                      0.497064   
1            0               2                      0.510757   
2            0               3                      0.498411   
3            0               4                      0.497652   
4            0               5                      0.488984   
..         ...             ...                           ...   
102          0             103                      0.487666   
103          0             104                      0.498818   
104          0             105                      0.484450   
105          0             106                      0.497596   
106          1               1                      0.497210   

     mean_y_displacement_keypoint  average_confidence_keypoint    face_part  
0                        0.492125                     0.495667         Chin  
1                        0.503165                     0.778484         Chin

In [15]:
metrics_df.to_csv('keypoints_metrics.csv', index=False)