In [1]:
import joblib
import numpy as np
from pathlib import Path
import pandas as pd
#from sklearn.model_selection import StratifiedKFold, LeaveOneOut

The input data for training or inference are formatted as a NumPy array in five dimensions (N, C, T, V, M):

N: The number of sequences  # N = 164

C: The number of input channels  # 3

T: The maximum sequence length in frames  # variable lengths expected

V: The number of joint points  # should be 25 for us

M: The number of persons.  # should be 1 for us

In [2]:
# Mapping from 73 SMPL+H joints to 25 NTU-RGBD joints (1-base)
# SMPL+H to NTU-RGBD Order (borrowed from: smpl_to_nturgbd() in https://github.com/abhinanda-punnakkal/BABEL/blob/main/action_recognition/data_gen/dutils.py)
# Note 1: 'spine'/'spine1' = 3/6 -> 2 = 'middle of the spine' (only one spine joint recorded in NTU)
# Note 2: 12, 22, 23, 24 and 25 NTU joints are approximations. The mappings below were made by BABEL.
# more details in SMPLH_JOINT_NAMES.py in the humor_dev folder.
_MAJOR_JOINTS = [
                    0, 3,               # 1 - base of the spine, 2 - middle of the spine
                    12, 15,             # 3 - neck, 4 - head
                    16, 18, 20, 22,     # [5,  6,  7,  8]  - left hand
                    17, 19, 21, 37,     # [9, 10, 11, 12]  - right hand
                    1,  4,  7, 10,      # [13, 14, 15, 16] - left leg
                    2,  5,  8, 11,      # [17, 18, 19, 20] - right leg
                    9,                  # 21 - spine
                    63, 64,             # 22-tip of the left hand 23-left thumb
                    68, 69              # 24-tip of the right hand 25-right thumb
                ]

In [3]:
joints_dict = joblib.load("/home/ayushsingla/humor_dev/data/clinical/all_joints_smplh_vtx.pkl") 

In [10]:
np.shape(joints_dict['001'])                     # (T, V', C) = (645, 73, 3). Note: V' is 73 cause joints are 0-indexed and run from 0 to 72.
np.shape(joints_dict['001'][:,_MAJOR_JOINTS,:])  # (T, V, C)  = (645, 25, 3). Joints are now in NTU-RGBD order.

(645, 25, 3)

In [4]:
data_dir = "/home/ayushsingla/humor_dev/GaitForeMer/data/labels/"
def data_generator(data_dict, csv_file):
    df = pd.read_csv(Path(data_dir) / csv_file)
    X_1 = []
    Y = []
    for key in data_dict.keys():
        p = np.copy(data_dict[key][:,_MAJOR_JOINTS,:])
        label = int(df.loc[df['ID'] == int(key)]['Y'].values[0])
        X_1.append(p)
        Y.append(label)
    X_nd = np.array(X_1, dtype=object)
    return X_nd, np.stack(Y), data_dict.keys()

csv_file = "binary_combined.csv"
data_dict = joblib.load("/home/ayushsingla/humor_dev/data/clinical/all_joints_smplh_vtx.pkl") 
X_nd, Y, X_indices = data_generator(data_dict, csv_file)
unique, counts = np.unique(Y, return_counts=True)
print("Label Frequency Counts:\n", np.asarray((unique, counts)).T)

Label Frequency Counts:
 [[  0 141]
 [  1  23]]


In [30]:
joblib.dump((X_nd, Y), Path("/home/ayushsingla/humor_dev/data/clinical/") / f"all_joints_ntu_w_lbl.pkl")

['/home/ayushsingla/humor_dev/data/clinical/all_joints_ntu_w_lbl.pkl']

In [11]:
np.savetxt("/home/ayushsingla/humor_dev/HD-GCN/data/clinical/data_indices.txt", list(X_indices), fmt='%s')

We run /home/ayushsingla/humor_dev/HD-GCN/data/clinical/seq_transformation.ipynb after the above cell is executed. Cells below are EDA.

In [9]:
165 / np.bincount(Y)

array([1.17021277, 7.17391304])

In [11]:
Y[0]

0

In [3]:
print(f"Mean: {np.mean([x.shape[0] for x in X_nd])}; STD: {np.std([x.shape[0] for x in X_nd])}")

Mean: 469.8536585365854; STD: 127.31635477790033


In [4]:
print(f"min: {np.min([x.shape[0] for x in X_nd])}; max: {np.max([x.shape[0] for x in X_nd])}")

min: 294; max: 1148


In [5]:
skf = StratifiedKFold(n_splits=10)
skf.get_n_splits(X_nd, Y)

10

In [6]:
data_folder = Path(f"/home/ayushsingla/humor_dev/GaitForeMer/data/smpl_k_fold/{Path(csv_file).stem}")
data_folder.mkdir(exist_ok=True)
for i, (train_index, test_index) in enumerate(skf.split(X_nd, Y)):
    print(f"Fold {i}:")
    print(f"  train len={len(train_index)}")
    print(f"  test len={len(test_index)}")
    train = X_nd[train_index], Y[train_index]
    test = X_nd[test_index], Y[test_index]
    joblib.dump(train, f"{data_folder}/train{i}.pkl")
    joblib.dump(test, f"{data_folder}/test{i}.pkl")

Fold 0:
  train len=147
  test len=17
Fold 1:
  train len=147
  test len=17
Fold 2:
  train len=147
  test len=17
Fold 3:
  train len=147
  test len=17
Fold 4:
  train len=148
  test len=16
Fold 5:
  train len=148
  test len=16
Fold 6:
  train len=148
  test len=16
Fold 7:
  train len=148
  test len=16
Fold 8:
  train len=148
  test len=16
Fold 9:
  train len=148
  test len=16


##### Sanity Check flattening last two axes for seq_transformation in HD-GCN

In [2]:
X_nd, Y = joblib.load("/home/ayushsingla/humor_dev/data/clinical/all_joints_ntu_w_lbl.pkl") 

In [10]:
print(X_nd[0][0])

[[ 0.0851932  -0.41286176  4.450547  ]
 [ 0.08118555 -0.5047138   4.513398  ]
 [ 0.05033259 -0.89866716  4.4876175 ]
 [ 0.04640254 -0.947069    4.4222918 ]
 [ 0.22923806 -0.8132663   4.445941  ]
 [ 0.3293384  -0.5747096   4.434816  ]
 [ 0.29247358 -0.39668292  4.2684436 ]
 [ 0.24091506 -0.34315658  4.1957636 ]
 [-0.11833104 -0.78749985  4.4843783 ]
 [-0.21322793 -0.5529443   4.4856257 ]
 [-0.243857   -0.35799032  4.3276205 ]
 [-0.21572524 -0.30160886  4.2479115 ]
 [ 0.15981266 -0.3294887   4.4197707 ]
 [ 0.2362887  -0.19321965  4.080019  ]
 [ 0.23311287  0.19501826  4.164399  ]
 [ 0.2438674   0.23691681  4.0385528 ]
 [ 0.01993255 -0.32228684  4.427219  ]
 [-0.08823486 -0.12825552  4.1168346 ]
 [-0.06997121  0.2296868   4.2895117 ]
 [-0.08776293  0.2857036   4.169641  ]
 [ 0.06915653 -0.68338203  4.4602284 ]
 [ 0.18079185 -0.37087047  4.192755  ]
 [ 0.21754795 -0.29847914  4.1363974 ]
 [-0.15907481 -0.34073126  4.220185  ]
 [-0.2113055  -0.2511987   4.1881433 ]]


In [11]:
X_nd[0][0].reshape(-1)

array([ 0.0851932 , -0.41286176,  4.450547  ,  0.08118555, -0.5047138 ,
        4.513398  ,  0.05033259, -0.89866716,  4.4876175 ,  0.04640254,
       -0.947069  ,  4.4222918 ,  0.22923806, -0.8132663 ,  4.445941  ,
        0.3293384 , -0.5747096 ,  4.434816  ,  0.29247358, -0.39668292,
        4.2684436 ,  0.24091506, -0.34315658,  4.1957636 , -0.11833104,
       -0.78749985,  4.4843783 , -0.21322793, -0.5529443 ,  4.4856257 ,
       -0.243857  , -0.35799032,  4.3276205 , -0.21572524, -0.30160886,
        4.2479115 ,  0.15981266, -0.3294887 ,  4.4197707 ,  0.2362887 ,
       -0.19321965,  4.080019  ,  0.23311287,  0.19501826,  4.164399  ,
        0.2438674 ,  0.23691681,  4.0385528 ,  0.01993255, -0.32228684,
        4.427219  , -0.08823486, -0.12825552,  4.1168346 , -0.06997121,
        0.2296868 ,  4.2895117 , -0.08776293,  0.2857036 ,  4.169641  ,
        0.06915653, -0.68338203,  4.4602284 ,  0.18079185, -0.37087047,
        4.192755  ,  0.21754795, -0.29847914,  4.1363974 , -0.15

In [12]:
X_75 = np.array([x.reshape(x.shape[0], -1) for x in X_nd], dtype=object)

In [14]:
X_75[0][0] == X_nd[0][0].reshape(-1)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [16]:
X_75[10].shape

(516, 75)