## GAAIMS: Predicting Multiple Sclerosis from Dynamics of Gait Variability Using an Instrumented Treadmill - A Machine Learning-Based Approach
## Dimensionless Scaling 
### Based on Hof, At L. "Scaling gait data to body size." Gait & posture 3, no. 4 (1996): 222-223.

### Package imports 

In [21]:
import numpy as np
import pandas as pd
import math
import os
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns

In [22]:
path = 'C:\\Users\\Rachneet Kaur\\Dropbox\\GAIT\\sample_data\\data_export\\'

In [23]:
#Reading the raw dataframe 
raw_df = pd.read_csv(path+'..\\..\\gait_features.csv', index_col = 0)
print (raw_df.shape)

#Reading the demographics of the subjects
demographies = pd.read_csv(path + '..\\demographics.csv')

#Keeping demographics of only the 35 subjects we have the raw data for 
demographies = demographies[demographies['subject ID'].isin(raw_df['PID'].unique())]

#Attaching the height, body mass and shoe size columns 
#Make sure the units match so that the final quantities are Dimension-less
demographies = demographies[['subject ID', 'height (m)', 'weight (kg)', 'shoe size (mm)']]

(3486, 33)


In [24]:
#Attaching the height, weight and show size to the corresponding subjects 
raw_df['height'] = raw_df['PID'].map(demographies.set_index('subject ID')['height (m)'])
raw_df['weight'] = raw_df['PID'].map(demographies.set_index('subject ID')['weight (kg)'])
raw_df['shoe_size'] = raw_df['PID'].map(demographies.set_index('subject ID')['shoe size (mm)'])
raw_df['shoe_size'] = raw_df['shoe_size']/1000.0 #Converting mm to m 

### Normalization 

In [25]:
#Creating the new dimensionless scaled dataframe 
g = 9.81 #Acceleration of gravity 
ds_df = pd.DataFrame()
ds_df[['PID', 'TrialID', 'Label']] = raw_df[['PID', 'TrialID', 'Label']]

#Lengths = Lengths/height
ds_df['stride_length'] = raw_df['stride_length']/raw_df['height']
ds_df['stride_width'] = raw_df['stride_width']/raw_df['height']

#Times = Times/sqrt(height/g)
ds_df['DS_R'] = raw_df['DS_R']/np.sqrt(raw_df['height']/g)
ds_df['SS_R'] = raw_df['SS_R']/np.sqrt(raw_df['height']/g)
ds_df['DS_L'] = raw_df['DS_L']/np.sqrt(raw_df['height']/g)
ds_df['SS_L'] = raw_df['SS_L']/np.sqrt(raw_df['height']/g)
ds_df['stride_time'] = raw_df['stride_time']/np.sqrt(raw_df['height']/g)
ds_df['stance_time'] = raw_df['stance_time']/np.sqrt(raw_df['height']/g)
ds_df['swing_time'] = raw_df['swing_time']/np.sqrt(raw_df['height']/g)

#Forces = Forces/(weight*g)
ds_df['force_HSR'] = raw_df['force_HSR']/(raw_df['weight']*g)
ds_df['force_MidSSR'] = raw_df['force_MidSSR']/(raw_df['weight']*g)
ds_df['force_TOR'] = raw_df['force_TOR']/(raw_df['weight']*g)
ds_df['force_HSL'] = raw_df['force_HSL']/(raw_df['weight']*g)
ds_df['force_TOL'] = raw_df['force_TOL']/(raw_df['weight']*g)
ds_df['force_MidSSL'] = raw_df['force_MidSSL']/(raw_df['weight']*g)

#Cadence = Cadence/(60*sqrt(g/h))
ds_df['cadence'] = raw_df['cadence']/(60*np.sqrt(g/raw_df['height']))

#Speeds = Speeds/sqrt(gh)
ds_df['tspeed_HSR'] = raw_df['tspeed_HSR']/np.sqrt(g*raw_df['height'])
ds_df['tspeed_HSL'] = raw_df['tspeed_HSL']/np.sqrt(g*raw_df['height'])
ds_df['tspeed_TOR'] = raw_df['tspeed_TOR']/np.sqrt(g*raw_df['height'])
ds_df['tspeed_TOL'] = raw_df['tspeed_TOL']/np.sqrt(g*raw_df['height'])
ds_df['tspeed_MidSSR'] = raw_df['tspeed_MidSSR']/np.sqrt(g*raw_df['height'])
ds_df['tspeed_MidSSL'] = raw_df['tspeed_MidSSL']/np.sqrt(g*raw_df['height'])
ds_df['stride_speed'] = raw_df['stride_speed']/np.sqrt(g*raw_df['height'])

#Walk ratio = Walk Ratio/(h/(60*sqrt(g/h)))
ds_df['walk_ratio'] = raw_df['walk_ratio']/(raw_df['height']/(60*np.sqrt(g/raw_df['height'])))

#Angles reamin the same
ds_df[['LeftFPA', 'RightFPA']] = raw_df[['LeftFPA', 'RightFPA']]

#Butterfly plot-features = P/Shoe_size
ds_df['Butterfly_x_abs'] = raw_df['Butterfly_x_abs']/raw_df['shoe_size']
ds_df['Butterfly_y_abs'] = raw_df['Butterfly_y_abs']/raw_df['shoe_size']
ds_df['ButterflySQ_x'] = raw_df['ButterflySQ_x']/raw_df['shoe_size']
ds_df['ButterflySQ_y'] = raw_df['ButterflySQ_y']/raw_df['shoe_size']

In [26]:
#Saving the scaled Dataframe to .csv
ds_df.to_csv(path + '\\..\\..\\size_normalized_gait_features.csv')

In [27]:
raw_df[:5]

Unnamed: 0,LeftFPA,RightFPA,Label,Butterfly_x_abs,Butterfly_y_abs,ButterflySQ_x,ButterflySQ_y,PID,TrialID,DS_R,...,swing_time,stance_time,stride_length,stride_width,stride_speed,cadence,walk_ratio,height,weight,shoe_size
0,,,0,0.075459,0.031715,0.005388,0.001013,200,1,0.408,...,0.364,1.19,0.555127,0.20092,0.357225,77.220077,0.014378,1.6,76.1,0.251
1,0.454459,1.03904,0,0.033365,0.013971,0.00098,0.000198,200,1,0.41,...,0.426,1.272,0.608529,0.204193,0.35838,70.671378,0.017221,1.6,76.1,0.251
2,-0.267919,-2.942001,0,0.01518,0.013155,0.000297,0.000176,200,1,0.466,...,0.612,1.114,0.48389,0.207683,0.280354,69.524913,0.01392,1.6,76.1,0.251
3,0.669773,3.611119,0,0.028616,0.018508,0.000705,0.000347,200,1,0.342,...,0.426,1.12,0.656292,0.217538,0.424509,77.619664,0.01691,1.6,76.1,0.251
4,-8.515633,3.855825,0,0.01187,0.028962,0.000194,0.000832,200,1,0.324,...,0.594,1.206,0.609956,0.184868,0.338864,66.666667,0.018299,1.6,76.1,0.251


In [28]:
ds_df[:5]

Unnamed: 0,PID,TrialID,Label,stride_length,stride_width,DS_R,SS_R,DS_L,SS_L,stride_time,...,tspeed_MidSSR,tspeed_MidSSL,stride_speed,walk_ratio,LeftFPA,RightFPA,Butterfly_x_abs,Butterfly_y_abs,ButterflySQ_x,ButterflySQ_y
0,200,1,0,0.346955,0.125575,1.010264,0.732936,1.203402,0.901314,3.847916,...,0.037612,0.056604,0.090167,1.335052,,,0.300633,0.126355,0.021467,0.004036
1,200,1,0,0.38033,0.127621,1.015216,1.124166,1.010264,1.054834,4.204479,...,0.069988,0.081414,0.090458,1.599091,0.454459,1.03904,0.132927,0.055663,0.003906,0.00079
2,200,1,0,0.302431,0.129802,1.153879,0.896361,0.708175,1.515395,4.273811,...,0.091783,0.092524,0.070764,1.292535,-0.267919,-2.942001,0.060477,0.052409,0.001183,0.000701
3,200,1,0,0.410182,0.135961,0.846839,1.099404,0.827029,1.054834,3.828106,...,0.087868,0.089455,0.10715,1.570221,0.669773,3.611119,0.114006,0.073737,0.002811,0.001382
4,200,1,0,0.381222,0.115542,0.802268,1.46092,0.723032,1.470825,4.457045,...,0.095856,0.104267,0.085533,1.699125,-8.515633,3.855825,0.047291,0.115388,0.000773,0.003316
