## Gait Video Study 
### Creating the summary statistics file for the traditional ML algorithms on task/subject generalization frameworks 
We use the summary statistics as CoV, range and asymmetry between the right and left limbs as the features to input to the traditional models requiring fixed size 1D input for each training/testing set sample.

In [63]:
import numpy as np
import cv2
import os
import glob
import pandas as pd
import time
import shutil
import scipy
import warnings
warnings.filterwarnings("ignore")
from IPython.display import display, HTML

In [64]:
path = 'C:\\Users\\Rachneet Kaur\\Box\\Gait Video Project\\GaitVideoData\\video\\'
data_path = path + 'downsampled_strides\\'
original_labels_path = path + 'labels_original.csv'

original_labels = pd.read_csv(original_labels_path, index_col= 0)
display(original_labels.head())

Unnamed: 0,cohort,trial,scenario,video,PID,stride_number,key,frame_count,label
0,HOA,BW,SLWT,GVS_212_T_T1,212,1,GVS_212_T_T1_1,46,0
1,HOA,BW,SLWT,GVS_212_T_T1,212,2,GVS_212_T_T1_2,39,0
2,HOA,BW,SLWT,GVS_212_T_T1,212,3,GVS_212_T_T1_3,56,0
3,HOA,BW,SLWT,GVS_212_T_T1,212,4,GVS_212_T_T1_4,53,0
4,HOA,BW,SLWT,GVS_212_T_T1,212,5,GVS_212_T_T1_5,44,0


In [65]:
# ### Run only once ###
# #Correcting labels file
# print (len(original_labels))
# #Discarding the repeated videos 
# discard_videos = ['GVS_404_W_T3', 'GVS_404_W_T4', 'GVS_405_W_T3', 'GVS_405_W_T4', 'GVS_411_W_T3', 'GVS_411_W_T4']
# labels_correct = original_labels[~(original_labels['video'].isin(discard_videos))].reset_index().drop(['index'], axis = 1)
# print ('Length after discarding repeated videos: ', len(labels_correct))

# #Discard the last some strides of the longer than ~60 sec videos 
# index_123 = labels_correct[labels_correct['video']=='GVS_123_W_T1'][: int(len(labels_correct[labels_correct['video']=='GVS_123_W_T1'])/2)].index.to_list()
# index_112 = labels_correct[labels_correct['video']=='GVS_112_W_T1'][: int(len(labels_correct[labels_correct['video']=='GVS_112_W_T1'])/2)].index.to_list()
# index_102 = labels_correct[labels_correct['video']=='GVS_102_W_T1'][: int(len(labels_correct[labels_correct['video']=='GVS_102_W_T1'])/2)].index.to_list()
# labels_correct = labels_correct[~labels_correct.index.isin(index_123+index_112+index_102)].reset_index().drop(['index'], axis = 1)
# print ('Length after discarding the extra strides of longer videos: ', len(labels_correct))

# labels_correct.to_csv(path + 'labels.csv')

4493
Length after discarding repeated videos:  4247
Length after discarding the extra strides of longer videos:  4096


In [72]:
labels_path = path + 'labels.csv'
labels = pd.read_csv(labels_path, index_col= 0)
display(labels.head())

Unnamed: 0,cohort,trial,scenario,video,PID,stride_number,key,frame_count,label
0,HOA,BW,SLWT,GVS_212_T_T1,212,1,GVS_212_T_T1_1,46,0
1,HOA,BW,SLWT,GVS_212_T_T1,212,2,GVS_212_T_T1_2,39,0
2,HOA,BW,SLWT,GVS_212_T_T1,212,3,GVS_212_T_T1_3,56,0
3,HOA,BW,SLWT,GVS_212_T_T1,212,4,GVS_212_T_T1_4,53,0
4,HOA,BW,SLWT,GVS_212_T_T1,212,5,GVS_212_T_T1_5,44,0


In [73]:
#Making the summary statistics dataframe 
#It will have all columns same as labels.csv and 90 extra columns for 36 CoV, 36 range and 18 asymmetry features
#We can then use summary statistics dataframe for all frameworks with the traditional algorithms 

keys = labels['key'] #Using the key representing the video and stride as the unique identifier
markers = ['hip', 'knee', 'ankle', 'heel', 'toe 1', 'toe 2']
order = ['right hip', 'right knee', 'right ankle', 'left hip', 'left knee', 'left ankle', 'left toe 1', 'left toe 2', \
         'left heel', 'right toe 1', 'right toe 2', 'right heel']
coordinates = [o + '-'+ y for o in order for y in ['x', 'y', 'z']]
coordinates_asymmetry = [m + '-' + y for m in markers for y in ['x', 'y', 'z']] #Defining the column names for the asymmetry
right_side_markers = ['right ' + c_a for c_a in coordinates_asymmetry] #Defining the right side markers 
left_side_markers = ['left ' + c_a for c_a in coordinates_asymmetry] #Defining the left side markers 
#Columns for the summary features dataframe 
feature_cols = [c + '-'+ y for y in ['CoV', 'range']  for c in coordinates] + [c_a + '-asymmetry' for c_a in coordinates_asymmetry]
#Summary feature dataframe with index as keys and columns as 90 summary statistics (36 CoV, 36 range and 18 asymmetry)
features_dataframe = pd.DataFrame(columns = feature_cols, index = keys)

# epsilon = 0.00000001 #Adding to the denominator of asymmetry metric to avoid situations with NaN/ infinity when 
#left side range and right side range both are zeros and hence num and den both equal 0 and hence give NaN situation

In [74]:
#Using the key as the unique identifier to loop through each stride in the dataset
for key in keys: 
    print (key)
    stride = pd.read_csv(data_path+key+'.csv', index_col = 0) 
#     stride = stride - stride.mean() #Substracting the mean of the stride feature from the corresponding features
#     display(stride.head())
    stride_cov = stride.std()/stride.mean() #Computing the stride's coefficient of variation across all 36 features 
    stride_range = stride.max()-stride.min() #Stride's range across all 36 features 
    #Stride's asymmetry across left and right side for 18 features 
    asymmetry_values = [np.abs(stride_range[x] - stride_range[y]) for x, y in zip(right_side_markers, left_side_markers)] 
    #/(0.5*(stride_range[x] + stride_range[y]) + epsilon
    #All the 90 summary statistics as a list 
    summary_stats = list(stride_cov.values)+list(stride_range.values)+asymmetry_values
#     print (len(summary_stats), summary_stats)
    #Assigning the row of the features dataframe with 90 values for summary statistics 
    features_dataframe.loc[key] = summary_stats

GVS_212_T_T1_1
GVS_212_T_T1_2
GVS_212_T_T1_3
GVS_212_T_T1_4
GVS_212_T_T1_5
GVS_212_T_T1_6
GVS_212_T_T1_7
GVS_212_T_T1_8
GVS_212_T_T1_9
GVS_212_T_T1_10
GVS_212_T_T1_11
GVS_212_T_T1_12
GVS_212_T_T1_13
GVS_212_T_T1_14
GVS_212_T_T1_15
GVS_212_T_T1_16
GVS_212_T_T1_17
GVS_212_T_T1_18
GVS_212_T_T1_19
GVS_212_T_T1_20
GVS_212_T_T1_21
GVS_212_T_T1_22
GVS_212_T_T1_23
GVS_212_T_T1_24
GVS_212_T_T1_25
GVS_212_T_T1_26
GVS_212_T_T1_27
GVS_212_T_T1_28
GVS_212_T_T1_29
GVS_212_T_T1_30
GVS_212_T_T1_31
GVS_212_T_T1_32
GVS_212_T_T1_33
GVS_212_T_T1_34
GVS_212_T_T1_35
GVS_212_T_T1_36
GVS_212_T_T1_37
GVS_212_T_T1_38
GVS_212_T_T1_39
GVS_212_T_T1_40
GVS_212_T_T1_41
GVS_212_T_T1_42
GVS_212_T_T1_43
GVS_212_T_T2_1
GVS_212_T_T2_2
GVS_212_T_T2_3
GVS_212_T_T2_4
GVS_212_T_T2_5
GVS_212_T_T2_6
GVS_212_T_T2_7
GVS_212_T_T2_8
GVS_212_T_T2_9
GVS_212_T_T2_10
GVS_212_T_T2_11
GVS_212_T_T2_12
GVS_212_T_T2_13
GVS_212_T_T2_14
GVS_212_T_T2_15
GVS_212_T_T2_16
GVS_212_T_T2_17
GVS_212_T_T2_18
GVS_212_T_T2_19
GVS_212_T_T2_20
GVS_212_T_

In [75]:
#After the features dataframe is ready, concatenating the features with the other information in the labels.csv 
#This data can now be use for all frameworks in traditional algorithms 
#This will have as many rows as strides in our dataset and 99 columns 
#(90 for features and 9 for information relative to the stride)
traditional_methods_dataframe = pd.concat((labels.set_index('key'), features_dataframe), axis = 1).reset_index()
#Saving to the .csv file 
traditional_methods_dataframe.to_csv(path+'traditional_methods_dataframe.csv')

In [76]:
traditional_methods_dataframe

Unnamed: 0,key,cohort,trial,scenario,video,PID,stride_number,frame_count,label,right hip-x-CoV,...,ankle-z-asymmetry,heel-x-asymmetry,heel-y-asymmetry,heel-z-asymmetry,toe 1-x-asymmetry,toe 1-y-asymmetry,toe 1-z-asymmetry,toe 2-x-asymmetry,toe 2-y-asymmetry,toe 2-z-asymmetry
0,GVS_212_T_T1_1,HOA,BW,SLWT,GVS_212_T_T1,212,1,46,0,0.046077,...,14.426173,3.407379,10.662441,0.830365,0.502570,31.450487,8.644012,5.236678,31.182183,8.215725
1,GVS_212_T_T1_2,HOA,BW,SLWT,GVS_212_T_T1,212,2,39,0,0.021528,...,1.360847,5.155307,11.363806,4.333776,1.025647,28.266400,2.671081,6.678294,15.058825,4.903579
2,GVS_212_T_T1_3,HOA,BW,SLWT,GVS_212_T_T1,212,3,56,0,0.034394,...,1.341021,8.625363,7.159495,3.366152,1.759968,17.545787,5.921325,8.243491,9.578638,3.008162
3,GVS_212_T_T1_4,HOA,BW,SLWT,GVS_212_T_T1,212,4,53,0,0.028511,...,2.375934,6.728268,0.098235,0.999027,0.541911,7.843339,4.279617,0.748023,19.471731,5.086056
4,GVS_212_T_T1_5,HOA,BW,SLWT,GVS_212_T_T1,212,5,44,0,0.025213,...,8.525816,1.775282,0.033210,9.166863,1.354601,6.674183,8.479480,4.373622,0.315168,11.795593
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4091,GVS_312_T_T1_44,MS,BW,SLWT,GVS_312_T_T1,312,44,36,1,0.071065,...,3.033847,5.854369,9.057097,5.285431,3.140125,6.428544,4.165710,4.623038,8.228849,4.613623
4092,GVS_312_T_T1_45,MS,BW,SLWT,GVS_312_T_T1,312,45,31,1,0.052121,...,3.162464,9.096853,3.332225,5.323200,5.028817,3.781212,2.687362,3.058057,3.779535,3.459574
4093,GVS_312_T_T1_46,MS,BW,SLWT,GVS_312_T_T1,312,46,33,1,0.056735,...,0.201689,1.852847,5.694405,0.058354,5.017291,4.010361,0.404459,4.801974,3.059228,0.724966
4094,GVS_312_T_T1_47,MS,BW,SLWT,GVS_312_T_T1,312,47,35,1,0.056819,...,2.545744,6.024273,0.770976,0.744356,5.251330,7.610876,6.776790,7.128994,8.654897,6.086117
