In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import copy
from sklearn.utils import shuffle

import random
from numpy.random import seed
from tensorflow.random import set_seed
seed(0)
set_seed(0)

# Data Processing: No Class Combination Case

Read in data to generate Pandas dataframe. If generating for the first time, will take about five minutes. Otherwise, it should take about 10 seconds.

In [3]:
datapath = '../data/Pose_Dataset/'
nFrames = 300 # number of frames per squat
df_filename = '../data/all-data.pkl' # where to save concatenated data
np_filename = '../data/np-data.npy'
name_to_label = {
    'bad_innner_thigh': 0,
    'bad_back_round': 1,
    'bad_back_warp': 2,
    'bad_head': 3,
    'bad_shallow': 4,
    'bad_toe': 5,
    'good': 6
}

if os.path.exists(df_filename) and os.path.exists(np_filename):
    # read in pkl file
    df = pd.read_pickle(df_filename)
    with open(np_filename, 'rb') as f:
        X_train = np.load(f)
        y_train = np.load(f)
        X_val = np.load(f)
        y_val = np.load(f)
        X_test = np.load(f)
        y_test = np.load(f)
else:
    # generate pkl file and npy file

    # initialize arrays to fill in each loop iteration
    filenames = []
    datas = []
    np_datas = []
    labels = []

    # read in each squat file
    for cur_dir, _, files in os.walk(datapath):
        for file in files:
            if not file.endswith('.json'):
                continue
            filename = os.path.join(cur_dir, file)
            filenames.append(filename)

            data = pd.read_json(filename).to_numpy()
            data = data[1,0:nFrames] # get data for frames. Note some files have 301 frames, truncate all to first 300
            datas.append(data)
            np_data = np.array([np.array(d) for d in data])
            if np_data.shape == (300, 171):
                np_datas.append(np_data)
            labels.append(name_to_label[cur_dir.split('/')[-2]])
        print(f"Done processing {cur_dir}")

    # move arrays into dataframe
    datas = np.array(datas) # convert to array so we can loop through
    d = {'filename': filenames}
    for i in range(nFrames):
        d[str(i)] = datas[:,i]
    df = pd.DataFrame(data=d)

    # save to pkl
    df.to_pickle(df_filename)

    # write data into numpy arrays and save
    np_datas = np.stack(np_datas)
    labels = np.array(labels)

    n = np_datas.shape[0]
    split_indices = [int(n * 0.8), int(n * 0.9)]
    train_indices, val_indices, test_indices = np.split(np.random.choice(n, n, replace=False), split_indices)

    X_train = np_datas[train_indices]
    y_train = labels[train_indices]

    X_val = np_datas[val_indices]
    y_val = labels[val_indices]

    X_test = np_datas[test_indices]
    y_test = labels[test_indices]
    
    with open(np_filename, 'wb') as f:
        np.save(f, X_train)
        np.save(f, y_train)
        np.save(f, X_val)
        np.save(f, y_val)
        np.save(f, X_test)
        np.save(f, y_test)

Done processing ../data/Pose_Dataset/
Done processing ../data/Pose_Dataset/good
Done processing ../data/Pose_Dataset/good/1115_3djoints_index
Done processing ../data/Pose_Dataset/bad_toe
Done processing ../data/Pose_Dataset/bad_toe/1115_3djoints_index
Done processing ../data/Pose_Dataset/bad_shallow
Done processing ../data/Pose_Dataset/bad_shallow/1115_3djoints_index
Done processing ../data/Pose_Dataset/bad_innner_thigh
Done processing ../data/Pose_Dataset/bad_innner_thigh/1115_3djoints_index
Done processing ../data/Pose_Dataset/bad_back_round
Done processing ../data/Pose_Dataset/bad_back_round/1115_3djoints_index
Done processing ../data/Pose_Dataset/bad_back_warp
Done processing ../data/Pose_Dataset/bad_back_warp/1115_3djoints_index
Done processing ../data/Pose_Dataset/bad_head
Done processing ../data/Pose_Dataset/bad_head/1115_3djoints_index


In [None]:
df.head(5)

In [None]:
print(df['label'])

Print out the first five rows of the dataframe. The 'filename' column contains full filepath + name of each squat, and the remaining columns contain pose data for each of 300 frames.

Print number of examples in each class

In [None]:
df_labels = df.label.unique()
for label in df_labels:
    print('%s: %i' %(label,sum(df['label']==label)))

nClasses = len(df_labels)
print('nClasses:', nClasses)

print('\n Train')
df_labels = df.label.unique()
for i in range(nClasses):
    print('%s: %i' %(i,sum(y_train==i)))

print('\n Val')
df_labels = df.label.unique()
for i in range(nClasses):
    print('%s: %i' %(i,sum(y_val==i)))

print('\n Test')
df_labels = df.label.unique()
for i in range(nClasses):
    print('%s: %i' %(i,sum(y_test==i)))

In [None]:
# convert y-vals to one-hot representation # REMEMBER TO ONLY RUN THIS ONCE
y_train_onehot = tf.keras.utils.to_categorical(y_train,num_classes=nClasses)
y_val_onehot = tf.keras.utils.to_categorical(y_val,num_classes=nClasses)
y_test_onehot = tf.keras.utils.to_categorical(y_test,num_classes=nClasses)

In [None]:
BATCH_SIZE = 16
train_dset = tf.data.Dataset.from_tensor_slices((X_train,y_train_onehot)).batch(BATCH_SIZE)
val_dset = tf.data.Dataset.from_tensor_slices((X_val,y_val_onehot)).batch(BATCH_SIZE)
test_dset = tf.data.Dataset.from_tensor_slices((X_test,y_test_onehot)).batch(BATCH_SIZE)

In [None]:
# save datasets
tf.data.experimental.save(train_dset,'../data/dsets/train_dset')
tf.data.experimental.save(val_dset,'../data/dsets/val_dset')
tf.data.experimental.save(test_dset,'../data/dsets/test_dset')

In [None]:
# print element_spec for input to loading model in other notebooks
train_dset.element_spec

In [None]:
##variables to use for the confusion matrices
print(df_labels,'\n', name_to_label)



display_labels = [] list is df_labels but ordering must be adjusted to go from smallest to largest  based on the values in name_to_label

ex. for df_labels = ['good' 'bad_toe' 'bad_shallow' 'bad_innner_thigh_or_bad_head'
 'bad_back_round' 'bad_back_warp']
 
 and name_to_label = {'bad_innner_thigh': 0, 'bad_back_round': 1, 'bad_back_warp': 2, 'bad_head': 0, 'bad_shallow': 3, 'bad_toe': 4, 'good': 5}
 
 display_labels = ['bad_innner_thigh_or_bad_head', 'bad_back_round', 'bad_back_warp', 'bad_shallow', 'bad_toe', 'good']

In [None]:
display_labels = ['bad_inner_thigh', 'bad_back_warp_or_bad_back_round', 'bad_head', 'bad_shallow', 'bad_toe', 'good']

