In [2]:
import joblib
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.model_selection import StratifiedKFold, LeaveOneOut

N = 164

In [3]:
# mapping from 17 SMPL joints to NTU (1-base)
# 'spine'/'spine1' = 3/6 -> 2 = 'middle of the spine' (only one spine joint recorded in NTU)
#  NTU Order:    1, 2,  3,  4,  5,  6,  7,  9, 10, 11, 13, 14, 15, 17, 18, 19, 21
_MAJOR_JOINTS = [0, 3, 12, 15, 13, 16, 18, 14, 17, 19,  1,  4,  7,  2,  5, 11,  9]  # SMPL Order
joints_dict = joblib.load("/home/ayushsingla/humor_dev/data/clinical/all_joints.pkl") 

In [4]:
data_dir = "/home/ayushsingla/humor_dev/GaitForeMer/data/labels/"
def data_generator(data_dict, csv_file):
    df = pd.read_csv(Path(data_dir) / csv_file)
    print(df['Y'].value_counts())
    X_1 = []
    Y = []
    for key in data_dict.keys():
        p = np.copy(data_dict[key][:,_MAJOR_JOINTS,:])
        label = int(df.loc[df['ID'] == int(key)]['Y'].values[0])
        X_1.append(p)
        Y.append(label)
    return X_1, np.stack(Y)

csv_file = "binary_combined.csv"
data_dict = joblib.load("/home/ayushsingla/humor_dev/data/clinical/all_joints.pkl") 
X, Y = data_generator(data_dict, csv_file)
X_nd = np.array(X)

0    141
1     24
Name: Y, dtype: int64


  app.launch_new_instance()


In [9]:
165 / np.bincount(Y)

array([1.17021277, 7.17391304])

In [11]:
Y[0]

0

In [4]:
print(f"Mean: {np.mean([x.shape[0] for x in X_nd])}; STD: {np.std([x.shape[0] for x in X_nd])}")

Mean: 469.8536585365854; STD: 127.31635477790033


In [5]:
skf = StratifiedKFold(n_splits=10)
skf.get_n_splits(X_nd, Y)

10

In [6]:
data_folder = Path(f"/home/ayushsingla/humor_dev/GaitForeMer/data/smpl_k_fold/{Path(csv_file).stem}")
data_folder.mkdir(exist_ok=True)
for i, (train_index, test_index) in enumerate(skf.split(X_nd, Y)):
    print(f"Fold {i}:")
    print(f"  train len={len(train_index)}")
    print(f"  test len={len(test_index)}")
    train = X_nd[train_index], Y[train_index]
    test = X_nd[test_index], Y[test_index]
    joblib.dump(train, f"{data_folder}/train{i}.pkl")
    joblib.dump(test, f"{data_folder}/test{i}.pkl")

Fold 0:
  train len=147
  test len=17
Fold 1:
  train len=147
  test len=17
Fold 2:
  train len=147
  test len=17
Fold 3:
  train len=147
  test len=17
Fold 4:
  train len=148
  test len=16
Fold 5:
  train len=148
  test len=16
Fold 6:
  train len=148
  test len=16
Fold 7:
  train len=148
  test len=16
Fold 8:
  train len=148
  test len=16
Fold 9:
  train len=148
  test len=16
