In [43]:
import os
import os.path as osp
import numpy as np
import pickle
import logging
import h5py
from sklearn.model_selection import train_test_split
import joblib
from pathlib import Path
import pandas as pd

In [44]:
root_path = './'
stat_path = osp.join(root_path, 'statistics')
setup_file = osp.join(stat_path, 'setup.txt')
camera_file = osp.join(stat_path, 'camera.txt')
performer_file = osp.join(stat_path, 'performer.txt')
replication_file = osp.join(stat_path, 'replication.txt')
label_file = osp.join(stat_path, 'label.txt')
skes_name_file = osp.join(stat_path, 'skes_available_name.txt')

denoised_path = osp.join(root_path, 'denoised_data')
raw_skes_joints_pkl = osp.join(denoised_path, 'raw_denoised_joints.pkl')
frames_file = osp.join(denoised_path, 'frames_cnt.txt')

save_path = './'
if not osp.exists(save_path):
    os.mkdir(save_path)

In [45]:
def seq_translation(skes_joints):
    """
        Arguments:
            skes_joints: (num_skeletons, num_valid_frames for each skeleton, 75)
                         from denoising: For joints, each frame contains 75 X-Y-Z coordinates (25 joints x 3 coordinates).
        Returns:
            skes_joints: (num_skeletons, num_real_frames for each skeleton, 75)
                         - removes any frames in the front with only (0, 0, 0) X-Y-Z values for all 25 joints
                         - sets joint-2 (middle of the spine/Spine1 in SMPL) as the new skeleton origin 
                                - how? by subtracting joint-2 (origin)'s X-Y-Z coordinates from the X-Y-Z coordinates of all 25 joints
    """
    for idx, ske_joints in enumerate(skes_joints):
        num_frames = ske_joints.shape[0]
        num_bodies = 1  # always 1 for clinical (ske_joints.shape[1] == 75)
        
        # get the "real" first frame of actor1
        #     - this skips past any frames with only (0, 0, 0) values for all 25 joints
        i = 0
        while i < num_frames:
            if np.any(ske_joints[i, :75] != 0):
                break
            i += 1

        # joint-2 (middle of the spine/Spine1 in SMPL) is to be set as the new skeleton origin
        origin = np.copy(ske_joints[i, 3:6])  # new origin: joint-2

        # how? by subtracting joint-2 (origin)'s X-Y-Z coordinates from the X-Y-Z coordinates of all 25 joints
        for f in range(num_frames):
            if num_bodies == 1:
                ske_joints[f] -= np.tile(origin, 25)


        skes_joints[idx] = ske_joints  # Update

    return skes_joints

In [46]:
def align_frames(skes_joints):
    """
    Align all sequences with the same frame length.
    
   
    Arguments:
        skes_joints: (num_skeletons, num_valid_frames for each skeleton, 75)
                     has been processed first by seq_transformation to set joint-2
                     i.e. (middle of the spine/Spine1 in SMPL) as the new skeleton origin

    Returns:
        aligned_skes_joints: (num_skeletons, max_num_frames, 150)
                     - all skeleton sequences aligned to max_num_frames from all of the sequences
                     - padded to 75 joint X-Y-Z coordinates
                            - each body has 75 joint coordinates (25 x 3)
    """
    num_skes = len(skes_joints)
    max_num_frames = np.max([x.shape[0] for x in skes_joints])  # 1148
    aligned_skes_joints = np.zeros((num_skes, max_num_frames, 75), dtype=np.float32)

    for idx, ske_joints in enumerate(skes_joints):
        num_frames = ske_joints.shape[0]
        aligned_skes_joints[idx, :num_frames] = ske_joints

    return aligned_skes_joints

In [47]:
def one_hot_vector(labels):
    num_skes = len(labels)
    labels_vector = np.zeros((num_skes, 2))  # binary classification
    for idx, l in enumerate(labels):
        labels_vector[idx, l] = 1

    return labels_vector

def split_dataset(skes_joints, label, data_indices):
    train_idxs, test_idxs = train_test_split(list(range(skes_joints.shape[0])), test_size=0.15, stratify=label, random_state=3407)
    train_labels = label[train_idxs]
    test_labels = label[test_idxs]

    train_x = skes_joints[train_idxs]
    train_y = one_hot_vector(train_labels)
    test_x = skes_joints[test_idxs]
    test_y = one_hot_vector(test_labels)
    
    np.savetxt("train_indices.txt", data_indices[train_idxs], fmt='%s')
    joblib.dump(dict(zip(train_idxs, data_indices[train_idxs])), "train_indices_dict.pkl")
    
    np.savetxt("test_indices.txt", data_indices[test_idxs], fmt='%s')
    joblib.dump(dict(zip(test_idxs, data_indices[test_idxs])), "test_indices_dict.pkl")
    
    save_name = 'clinical.npz'
    np.savez(save_name, x_train=train_x, y_train=train_y, x_test=test_x, y_test=test_y)
    
    unique, counts = np.unique(label[train_idxs], return_counts=True)
    print("Train Frequency Counts:\n", np.asarray((unique, counts)).T)
    
    unique, counts = np.unique(label[test_idxs], return_counts=True)
    print("Test Frequency Counts:\n", np.asarray((unique, counts)).T)

In [48]:
data_indices = np.loadtxt("data_indices.txt", dtype=str)

In [49]:
raw_skes_joints, label = joblib.load("all_joints_ntu_w_lbl.pkl")
skes_joints = np.array([ske_joints.reshape(ske_joints.shape[0], -1) for ske_joints in raw_skes_joints], dtype=object)

In [50]:
skes_joints = seq_translation(skes_joints)

In [51]:
skes_joints[0].shape

(645, 75)

In [52]:
skes_joints = align_frames(skes_joints)  # aligned to the same frame length

In [53]:
skes_joints.shape

(164, 1148, 75)

In [54]:
split_dataset(skes_joints, label, data_indices)

Train Frequency Counts:
 [[  0 120]
 [  1  19]]
Test Frequency Counts:
 [[ 0 21]
 [ 1  4]]
