## Gait Video Study 
### Creating the summary statistics file for the traditional ML algorithms on task/subject generalization frameworks 
We use the summary statistics as CoV, range and asymmetry between the right and left limbs as the features to input to the traditional models requiring fixed size 1D input for each training/testing set sample.

In [1]:
import numpy as np
import cv2
import os
import glob
import pandas as pd
import time
import shutil
import scipy
import warnings
warnings.filterwarnings("ignore")
from IPython.display import display, HTML

In [2]:
path = 'C:\\Users\\Rachneet Kaur\\Box\\Gait Video Project\\GaitVideoData\\video\\'
data_path = path + 'downsampled_strides\\'
labels_path = path + 'labels.csv'

labels = pd.read_csv(labels_path, index_col= 0)
display(labels.head())

Unnamed: 0,cohort,trial,scenario,video,PID,stride_number,key,frame_count,label
0,HOA,BW,SLWT,GVS_212_T_T1,212,1,GVS_212_T_T1_1,46,0
1,HOA,BW,SLWT,GVS_212_T_T1,212,2,GVS_212_T_T1_2,39,0
2,HOA,BW,SLWT,GVS_212_T_T1,212,3,GVS_212_T_T1_3,56,0
3,HOA,BW,SLWT,GVS_212_T_T1,212,4,GVS_212_T_T1_4,53,0
4,HOA,BW,SLWT,GVS_212_T_T1,212,5,GVS_212_T_T1_5,44,0


In [3]:
#Making the summary statistics dataframe 
#It will have all columns same as labels.csv and 90 extra columns for 36 CoV, 36 range and 18 asymmetry features
#We can then use summary statistics dataframe for all frameworks with the traditional algorithms 

keys = labels['key'] #Using the key representing the video and stride as the unique identifier
markers = ['hip', 'knee', 'ankle', 'heel', 'toe 1', 'toe 2']
order = ['right hip', 'right knee', 'right ankle', 'left hip', 'left knee', 'left ankle', 'left toe 1', 'left toe 2', \
         'left heel', 'right toe 1', 'right toe 2', 'right heel']
coordinates = [o + '-'+ y for o in order for y in ['x', 'y', 'z']]
coordinates_asymmetry = [m + '-' + y for m in markers for y in ['x', 'y', 'z']] #Defining the column names for the asymmetry
right_side_markers = ['right ' + c_a for c_a in coordinates_asymmetry] #Defining the right side markers 
left_side_markers = ['left ' + c_a for c_a in coordinates_asymmetry] #Defining the left side markers 
#Columns for the summary features dataframe 
feature_cols = [c + '-'+ y for y in ['CoV', 'range']  for c in coordinates] + [c_a + '-asymmetry' for c_a in coordinates_asymmetry]
#Summary feature dataframe with index as keys and columns as 90 summary statistics (36 CoV, 36 range and 18 asymmetry)
features_dataframe = pd.DataFrame(columns = feature_cols, index = keys)

In [None]:
#Using the key as the unique identifier to loop through each stride in the dataset
for key in keys: 
    stride = pd.read_csv(data_path+key+'.csv', index_col = 0) 
#     display(stride.head())
    stride_cov = stride.std()/stride.mean() #Computing the stride's coefficient of variation across all 36 features 
    stride_range = stride.max()-stride.min() #Stride's range across all 36 features 
    #Stride's asymmetry across left and right side for 18 features 
    asymmetry_values = [np.abs(stride_range[x] - stride_range[y]) for x, y in zip(right_side_markers, left_side_markers)]
    #All the 90 summary statistics as a list 
    summary_stats = list(stride_cov.values)+list(stride_range.values)+asymmetry_values
#     print (len(summary_stats), summary_stats)
    #Assigning the row of the features dataframe with 90 values for summary statistics 
    features_dataframe.loc[key] = summary_stats

In [None]:
#After the features dataframe is ready, concatenating the features with the other information in the labels.csv 
#This data can now be use for all frameworks in traditional algorithms 
#This will have as many rows as strides in our dataset and 99 columns 
#(90 for features and 9 for information relative to the stride)
traditional_methods_dataframe = pd.concat((labels.set_index('key'), features_dataframe), axis = 1).reset_index()
#Saving to the .csv file 
traditional_methods_dataframe.to_csv(path+'traditional_methods_dataframe.csv')