## Runscribe: Comparison of Data on Treadmill & Outdoors

### data analysis

In [1]:
# import dependencies
import pandas as pd
import numpy as np
import scipy
from scipy import stats
from scipy.stats import variation

### cleaning and descriptives for outdoor runs

In [2]:
# read in outdoor_df
outdoor_df = pd.read_csv("outdoor_df.csv")
outdoor_df.head()

Unnamed: 0,timestamp,step,stride_pace,step_rate,stride_length,contact_time,flight_ratio,power,shock,impact_gs,...,debug_2,flight_time,stride_angle,walking,footstrike_type_new,laterality,date,injury_status,subject_id,medium
0,511,2,1.7899,201.6807,1.065,310,0.0,,14.2155,13.0703,...,3.625,-13,0.0892,,8.3526,left,2018-03-30,healthy,4,outdoor
1,1106,3,1.5258,171.9198,1.065,379,0.0,,10.8569,10.3164,...,8.6094,-30,0.475,,7.5634,left,2018-03-30,healthy,4,outdoor
2,1804,4,1.0714,120.7243,1.065,643,0.0,,13.4718,12.7734,...,11.9375,-146,11.1087,,4.0725,left,2018-03-30,healthy,4,outdoor
3,2798,5,1.1464,129.1712,1.065,644,0.0,,5.6738,3.3516,...,7.8398,-180,16.6177,,5.1109,left,2018-03-30,healthy,4,outdoor
4,3727,6,1.5236,171.6738,1.065,434,0.0,,5.1106,1.5586,...,4.8633,-85,3.8075,,8.2728,left,2018-03-30,healthy,4,outdoor


In [3]:
# rows and columns
outdoor_df.shape

(10688, 33)

In [4]:
# variables
outdoor_df.columns

Index(['timestamp', 'step', 'stride_pace', 'step_rate', 'stride_length',
       'contact_time', 'flight_ratio', 'power', 'shock', 'impact_gs',
       'braking_gs', 'footstrike_type', 'pronation_excursion_fs_mp',
       'max_pronation_velocity', 'step_length', 'pronation_excursion_mp_to',
       'stance_excursion_fs_mp', 'stance_excursion_mp_to', 'split_stride_pace',
       'cycle_time', 'split_cycle_time', 'contact_ratio', 'debug_1', 'debug_2',
       'flight_time', 'stride_angle', 'walking', 'footstrike_type_new',
       'laterality', 'date', 'injury_status', 'subject_id', 'medium'],
      dtype='object')

In [5]:
# number of observations for each subject
outdoor_df.subject_id.value_counts()

27    2492
4     2472
23    2197
16    2043
12    1484
Name: subject_id, dtype: int64

In [6]:
# drop unnessary columns
outdoor_df = outdoor_df.drop(['walking', 'debug_1', 'debug_2', 'power', 'timestamp'], axis = 1)
outdoor_df.head()

Unnamed: 0,step,stride_pace,step_rate,stride_length,contact_time,flight_ratio,shock,impact_gs,braking_gs,footstrike_type,...,split_cycle_time,contact_ratio,flight_time,stride_angle,footstrike_type_new,laterality,date,injury_status,subject_id,medium
0,2,1.7899,201.6807,1.065,310,0.0,14.2155,13.0703,5.5898,12,...,595,108.7719,-13,0.0892,8.3526,left,2018-03-30,healthy,4,outdoor
1,3,1.5258,171.9198,1.065,379,0.0,10.8569,10.3164,3.3828,11,...,698,118.8088,-30,0.475,7.5634,left,2018-03-30,healthy,4,outdoor
2,4,1.0714,120.7243,1.065,643,0.0,13.4718,12.7734,4.2813,5,...,994,183.1909,-146,11.1087,4.0725,left,2018-03-30,healthy,4,outdoor
3,5,1.1464,129.1712,1.065,644,0.0,5.6738,3.3516,4.5781,7,...,929,225.9649,-180,16.6177,5.1109,left,2018-03-30,healthy,4,outdoor
4,6,1.5236,171.6738,1.065,434,0.0,5.1106,1.5586,4.8672,12,...,699,163.7736,-85,3.8075,8.2728,left,2018-03-30,healthy,4,outdoor


In [7]:
# drop observations where flight_ratio is less than 10 (walking)
outdoor_running_df = outdoor_df[outdoor_df.flight_ratio > 10]

#check that observations were dropped
outdoor_running_df["flight_ratio"].min()

10.028700000000001

In [8]:
# group data analysis by subject
outdoor_groupby_subject = outdoor_running_df.groupby('subject_id')

In [9]:
# descriptive statistics by subject
outdoor_describe = outdoor_groupby_subject.describe(percentiles=[])\
                   .reset_index()

outdoor_describe

Unnamed: 0_level_0,subject_id,braking_gs,braking_gs,braking_gs,braking_gs,braking_gs,braking_gs,contact_ratio,contact_ratio,contact_ratio,...,stride_length,stride_length,stride_length,stride_length,stride_pace,stride_pace,stride_pace,stride_pace,stride_pace,stride_pace
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,50%,max,count,mean,std,...,std,min,50%,max,count,mean,std,min,50%,max
0,4,2125.0,10.527694,2.909489,0.6367,10.3633,15.9766,2125.0,67.092987,4.689124,...,0.158584,1.0853,2.2888,3.0586,2125.0,3.241155,0.234,1.4356,3.2112,4.2952
1,12,1061.0,12.5855,2.65952,1.6875,13.2305,15.9961,1061.0,58.566143,9.21793,...,1.18976,1.0062,2.7887,5.8896,1061.0,4.048926,1.65565,1.3932,3.8767,9.0215
2,16,1877.0,11.105368,2.796769,4.2969,11.2734,15.9805,1877.0,69.0242,6.673315,...,0.103192,1.8872,2.6475,3.0509,1877.0,3.58794,0.175599,2.6769,3.603,4.2511
3,23,2153.0,9.886145,2.638059,5.3867,9.2227,15.9922,2153.0,62.230966,5.99401,...,0.211812,1.2059,2.4297,3.4033,2153.0,3.420573,0.305129,1.5343,3.378,4.6942
4,27,2157.0,9.870123,2.958839,4.8828,9.1797,15.9922,2157.0,71.070569,7.211966,...,0.194075,0.5893,2.0888,2.8886,2157.0,3.004906,0.299621,0.7964,2.9668,4.2479


In [10]:
outdoor_mean = outdoor_groupby_subject.mean().add_suffix('_mean')\
               .reset_index()
outdoor_mean

Unnamed: 0,subject_id,step_mean,stride_pace_mean,step_rate_mean,stride_length_mean,contact_time_mean,flight_ratio_mean,shock_mean,impact_gs_mean,braking_gs_mean,...,pronation_excursion_mp_to_mean,stance_excursion_fs_mp_mean,stance_excursion_mp_to_mean,split_stride_pace_mean,cycle_time_mean,split_cycle_time_mean,contact_ratio_mean,flight_time_mean,stride_angle_mean,footstrike_type_new_mean
0,4,709.323294,3.241155,168.416157,2.309117,285.88,19.731211,14.850554,9.97203,10.527694,...,0.297388,26.56431,64.332242,3.241155,712.752,712.752,67.092987,70.248,1.235843,2.429171
1,12,485.090481,4.048926,166.484367,2.921104,264.995287,26.624048,18.086746,12.778077,12.5855,...,-5.249249,15.549934,36.247697,4.048926,721.94722,721.94722,58.566143,95.72573,2.527976,5.568268
2,16,529.086308,3.58794,163.193955,2.637614,299.795951,18.459271,15.462385,10.223779,11.105368,...,4.231296,26.048063,58.154061,3.58794,735.589771,735.589771,69.0242,67.765051,1.035891,2.576302
3,23,546.237343,3.420573,166.797297,2.460639,275.443103,23.393806,14.194952,9.77839,9.886145,...,12.153834,19.206575,61.683034,3.420573,719.714352,719.714352,62.230966,84.167673,1.676274,4.526125
4,27,650.439963,3.004906,170.86663,2.109433,291.166898,17.070546,14.458229,10.106231,9.870123,...,14.902021,14.92989,62.511791,3.004906,702.78535,702.78535,71.070569,59.978674,1.062641,5.744982


In [11]:
outdoor_sd = outdoor_groupby_subject.std().add_suffix('_sd')\
             .reset_index()
outdoor_sd

Unnamed: 0,subject_id,step_sd,stride_pace_sd,step_rate_sd,stride_length_sd,contact_time_sd,flight_ratio_sd,shock_sd,impact_gs_sd,braking_gs_sd,...,pronation_excursion_mp_to_sd,stance_excursion_fs_mp_sd,stance_excursion_mp_to_sd,split_stride_pace_sd,cycle_time_sd,split_cycle_time_sd,contact_ratio_sd,flight_time_sd,stride_angle_sd,footstrike_type_new_sd
0,4,312.89161,0.234,2.98703,0.158584,13.461398,3.335133,3.314791,3.576717,2.909489,...,8.140969,7.200345,7.151112,0.234,13.11919,13.11919,4.689124,11.834075,0.395863,2.052099
1,12,292.212825,1.65565,7.534766,1.18976,35.665704,9.484774,2.4696,2.118382,2.65952,...,57.19161,33.133749,25.230977,1.65565,25.917668,25.917668,9.21793,32.425229,3.837816,9.443118
2,16,298.216763,0.175599,3.114614,0.103192,19.601152,4.697403,3.268043,3.7543,2.796769,...,3.665315,4.537285,3.378618,0.175599,14.072954,14.072954,6.673315,17.069521,0.499959,1.293126
3,23,317.088845,0.305129,3.271075,0.211812,16.349791,4.482078,3.420544,3.590057,2.638059,...,7.060475,6.706002,6.464045,0.305129,14.204712,14.204712,5.99401,16.394878,0.592439,1.91121
4,27,379.364728,0.299621,4.488585,0.194075,19.226942,5.187551,3.31321,3.422332,2.958839,...,5.068085,10.316032,12.856363,0.299621,18.411696,18.411696,7.211966,18.464239,0.741174,2.940069


In [12]:
# median descriptive statistic by subject
outdoor_median = outdoor_groupby_subject.median().add_suffix('_median')\
                 .reset_index()
outdoor_median

Unnamed: 0,subject_id,step_median,stride_pace_median,step_rate_median,stride_length_median,contact_time_median,flight_ratio_median,shock_median,impact_gs_median,braking_gs_median,...,pronation_excursion_mp_to_median,stance_excursion_fs_mp_median,stance_excursion_mp_to_median,split_stride_pace_median,cycle_time_median,split_cycle_time_median,contact_ratio_median,flight_time_median,stride_angle_median,footstrike_type_new_median
0,4,709,3.2112,168.0672,2.2888,285,20.1117,15.0736,10.0625,10.3633,...,-1.5095,28.3783,63.239,3.2112,714,714,66.5049,72,1.2382,1.9122
1,12,462,3.8767,165.7459,2.7887,270,25.3444,18.4625,13.1055,13.2305,...,-33.6959,12.2495,32.9838,3.8767,724,724,59.5506,91,1.6286,6.5089
2,16,550,3.603,163.2653,2.6475,296,18.7675,15.8401,10.5898,11.2734,...,4.6506,26.7331,58.1829,3.603,735,735,68.2353,69,1.0065,2.3811
3,23,547,3.378,166.6667,2.4297,274,23.9669,14.2427,9.6055,9.2227,...,11.5295,20.0994,60.5517,3.378,720,720,61.3333,86,1.681,4.2717
4,27,683,2.9668,170.4545,2.0888,295,15.942,14.7502,10.0938,9.1797,...,14.7303,18.4294,61.6202,2.9668,704,704,72.4638,55,0.8164,4.7476


In [13]:
# calculate COV
outdoor_cov = outdoor_groupby_subject\
              .aggregate(lambda x: scipy.stats.variation(x, axis=None))\
              .add_suffix('_cov')\
              .reset_index()

outdoor_cov

#outdoor_cov.to_csv("outdoor_cov.csv")




Unnamed: 0,subject_id,step_cov,stride_pace_cov,step_rate_cov,stride_length_cov,contact_time_cov,flight_ratio_cov,shock_cov,impact_gs_cov,braking_gs_cov,...,pronation_excursion_mp_to_cov,stance_excursion_fs_mp_cov,stance_excursion_mp_to_cov,split_stride_pace_cov,cycle_time_cov,split_cycle_time_cov,contact_ratio_cov,flight_time_cov,stride_angle_cov,footstrike_type_new_cov
0,4,0.441009,0.07218,0.017732,0.068661,0.047077,0.168989,0.223157,0.35859,0.2763,...,27.368492,0.27099,0.111133,0.07218,0.018402,0.018402,0.069873,0.168422,0.320243,0.844574
1,12,0.602104,0.408718,0.045237,0.407106,0.134527,0.35608,0.136478,0.165704,0.211217,...,-10.890062,2.129792,0.695743,0.408718,0.035883,0.035883,0.157319,0.338571,1.517422,1.695081
2,16,0.563495,0.048928,0.01908,0.039113,0.065364,0.254406,0.211298,0.367115,0.251772,...,0.866008,0.174143,0.058082,0.048928,0.019126,0.019126,0.096655,0.251826,0.482508,0.501797
3,23,0.580362,0.089183,0.019607,0.08606,0.059344,0.191548,0.240913,0.367057,0.266782,...,0.580791,0.34907,0.10477,0.089183,0.019732,0.019732,0.096296,0.194743,0.353344,0.422164
4,27,0.583108,0.099687,0.026263,0.091982,0.066019,0.303819,0.229104,0.338557,0.299708,...,0.340015,0.690805,0.205615,0.099687,0.026192,0.026192,0.101453,0.307775,0.697321,0.511644


In [14]:
outdoor_compare = pd.merge(outdoor_mean, outdoor_sd,
                           on = "subject_id", how = 'left')

outdoor_compare = pd.merge(outdoor_compare, outdoor_cov,
                           on = "subject_id", how = 'left')

outdoor_compare

Unnamed: 0,subject_id,step_mean,stride_pace_mean,step_rate_mean,stride_length_mean,contact_time_mean,flight_ratio_mean,shock_mean,impact_gs_mean,braking_gs_mean,...,pronation_excursion_mp_to_cov,stance_excursion_fs_mp_cov,stance_excursion_mp_to_cov,split_stride_pace_cov,cycle_time_cov,split_cycle_time_cov,contact_ratio_cov,flight_time_cov,stride_angle_cov,footstrike_type_new_cov
0,4,709.323294,3.241155,168.416157,2.309117,285.88,19.731211,14.850554,9.97203,10.527694,...,27.368492,0.27099,0.111133,0.07218,0.018402,0.018402,0.069873,0.168422,0.320243,0.844574
1,12,485.090481,4.048926,166.484367,2.921104,264.995287,26.624048,18.086746,12.778077,12.5855,...,-10.890062,2.129792,0.695743,0.408718,0.035883,0.035883,0.157319,0.338571,1.517422,1.695081
2,16,529.086308,3.58794,163.193955,2.637614,299.795951,18.459271,15.462385,10.223779,11.105368,...,0.866008,0.174143,0.058082,0.048928,0.019126,0.019126,0.096655,0.251826,0.482508,0.501797
3,23,546.237343,3.420573,166.797297,2.460639,275.443103,23.393806,14.194952,9.77839,9.886145,...,0.580791,0.34907,0.10477,0.089183,0.019732,0.019732,0.096296,0.194743,0.353344,0.422164
4,27,650.439963,3.004906,170.86663,2.109433,291.166898,17.070546,14.458229,10.106231,9.870123,...,0.340015,0.690805,0.205615,0.099687,0.026192,0.026192,0.101453,0.307775,0.697321,0.511644


In [15]:
# get steps for outdoor run to compare to treadmill run
outdoor_max = outdoor_groupby_subject.max()
outdoor_max['step']

subject_id
4     1254
12    1060
16    1024
23    1099
27    1276
Name: step, dtype: int64

### cleaning and descriptives for treadmill runs

In [16]:
# read in treadmill_df
treadmill_df = pd.read_csv("treadmill_df.csv")
treadmill_df.head()

Unnamed: 0,timestamp,step,stride_pace,step_rate,stride_length,contact_time,flight_ratio,power,shock,impact_gs,...,debug_2,flight_time,stride_angle,walking,footstrike_type_new,laterality,date,injury_status,subject_id,medium
0,408,1,1.7251,160.2136,1.2921,460,0.0,,9.2403,8.8711,...,2.4727,-86,3.214,,8.1634,left,2018-04-07,healthy,4,treadmill
1,1157,2,1.917,178.0415,1.2921,405,0.0,,8.5985,8.1133,...,1.3242,-68,2.0107,,8.7936,left,2018-04-07,healthy,4,treadmill
2,1831,3,1.7159,159.3625,1.2921,394,0.0,,9.3279,8.8203,...,3.832,-18,0.1409,,8.5086,left,2018-04-07,healthy,4,treadmill
3,2584,4,1.3334,123.839,1.2921,614,0.0,,10.7894,1.8984,...,0.4961,-130,7.3118,,2.2456,left,2018-04-07,healthy,4,treadmill
4,3553,5,1.376,127.7955,1.2921,599,0.0,,9.0301,5.6172,...,7.4453,-130,7.3118,,2.7498,left,2018-04-07,healthy,4,treadmill


In [17]:
# rows and columns
treadmill_df.shape

(29814, 33)

In [18]:
# variables
treadmill_df.columns

Index(['timestamp', 'step', 'stride_pace', 'step_rate', 'stride_length',
       'contact_time', 'flight_ratio', 'power', 'shock', 'impact_gs',
       'braking_gs', 'footstrike_type', 'pronation_excursion_fs_mp',
       'max_pronation_velocity', 'step_length', 'pronation_excursion_mp_to',
       'stance_excursion_fs_mp', 'stance_excursion_mp_to', 'split_stride_pace',
       'cycle_time', 'split_cycle_time', 'contact_ratio', 'debug_1', 'debug_2',
       'flight_time', 'stride_angle', 'walking', 'footstrike_type_new',
       'laterality', 'date', 'injury_status', 'subject_id', 'medium'],
      dtype='object')

In [19]:
# number of observations for each subject
treadmill_df.subject_id.value_counts()

12    7788
27    6120
4     5586
23    5413
16    4907
Name: subject_id, dtype: int64

In [20]:
# drop unnecessary columns
treadmill_df = treadmill_df.drop(['walking', 'debug_1', 'debug_2', 'power', 'timestamp'], axis = 1)
treadmill_df.head()

Unnamed: 0,step,stride_pace,step_rate,stride_length,contact_time,flight_ratio,shock,impact_gs,braking_gs,footstrike_type,...,split_cycle_time,contact_ratio,flight_time,stride_angle,footstrike_type_new,laterality,date,injury_status,subject_id,medium
0,1,1.7251,160.2136,1.2921,460,0.0,9.2403,8.8711,2.5859,12,...,749,159.1696,-86,3.214,8.1634,left,2018-04-07,healthy,4,treadmill
1,2,1.917,178.0415,1.2921,405,0.0,8.5985,8.1133,2.8477,13,...,674,150.5576,-68,2.0107,8.7936,left,2018-04-07,healthy,4,treadmill
2,3,1.7159,159.3625,1.2921,394,0.0,9.3279,8.8203,3.0352,12,...,753,109.7493,-18,0.1409,8.5086,left,2018-04-07,healthy,4,treadmill
3,4,1.3334,123.839,1.2921,614,0.0,10.7894,1.8984,10.6211,3,...,969,172.9577,-130,7.3118,2.2456,left,2018-04-07,healthy,4,treadmill
4,5,1.376,127.7955,1.2921,599,0.0,9.0301,5.6172,7.0703,4,...,939,176.1765,-130,7.3118,2.7498,left,2018-04-07,healthy,4,treadmill


In [21]:
#check that columns were dropped
treadmill_df.shape

(29814, 28)

In [22]:
# sanity check on flight_ratio
treadmill_df['flight_ratio'].min()

0.0

In [23]:
# drop observations where flight_ratio is less than 10 (walking)
treadmill_running_df = treadmill_df[treadmill_df.flight_ratio > 10]

#check that observations were dropped
treadmill_running_df["flight_ratio"].min()

10.025700000000001

In [24]:
#treadmill_running_df.dtypes

In [25]:
treadmill_running_df.head()

Unnamed: 0,step,stride_pace,step_rate,stride_length,contact_time,flight_ratio,shock,impact_gs,braking_gs,footstrike_type,...,split_cycle_time,contact_ratio,flight_time,stride_angle,footstrike_type_new,laterality,date,injury_status,subject_id,medium
56,57,2.1873,128.6174,2.0407,350,24.8927,5.2053,3.0273,4.2344,10,...,933,60.0343,116,3.701,7.4016,left,2018-04-07,healthy,4,treadmill
410,421,2.5812,175.4386,1.7655,299,12.5731,6.6374,2.2344,6.25,2,...,684,77.6623,43,0.5886,1.4472,left,2018-04-07,healthy,4,treadmill
525,541,2.5062,155.0388,1.9398,339,12.4031,11.396,9.7578,5.8867,5,...,774,77.931,48,0.6676,3.9204,left,2018-04-07,healthy,4,treadmill
527,543,2.9097,156.25,2.2347,309,19.5313,11.539,9.9727,5.8047,4,...,768,67.3203,75,1.4145,3.4433,left,2018-04-07,healthy,4,treadmill
528,544,2.9298,156.0468,2.253,319,16.9271,9.4236,7.4297,5.7969,4,...,769,70.8889,65,1.0539,3.27,left,2018-04-07,healthy,4,treadmill


In [26]:
# group data analysis by subject
treadmill_groupby_subject = treadmill_running_df.groupby('subject_id')

# get steps for treadmill run to compare to outdoor run
treadmill_groupby_subject['step'].max()


subject_id
4     2805
12    3966
16    2722
23    2706
27    3105
Name: step, dtype: int64

In [48]:
treadmill_steps = treadmill_running_df.drop(treadmill_running_df[((treadmill_running_df['subject_id'] == 4)
                                                                  & (treadmill_running_df['step'] > 1254))].index)
                                                                

In [49]:
treadmill_steps = treadmill_steps.drop(treadmill_steps[((treadmill_steps['subject_id'] == 12)
                                                                  & (treadmill_steps['step'] > 1060))].index)
                                                                

In [50]:
treadmill_steps = treadmill_steps.drop(treadmill_steps[((treadmill_steps['subject_id'] == 16)
                                                                  & (treadmill_steps['step'] > 1025))].index)
                                                

In [51]:
treadmill_steps = treadmill_steps.drop(treadmill_steps[((treadmill_steps['subject_id'] == 23)
                                                                  & (treadmill_steps['step'] > 1099))].index)
                                                                  

In [53]:
treadmill_steps = treadmill_steps.drop(treadmill_steps[((treadmill_steps['subject_id'] == 27)
                                                                  & (treadmill_steps['step'] > 1276))].index)
                                                                  

In [54]:
# group data analysis by subject
treadmill_steps_groupby_subject = treadmill_steps.groupby('subject_id')
treadmill_steps_groupby_subject['step'].max()

subject_id
4     1254
12    1060
16    1025
23    1099
27    1276
Name: step, dtype: int64

In [55]:
# descriptive statistics by subject
treadmill_groupby_subject.describe(percentiles=[])

Unnamed: 0_level_0,braking_gs,braking_gs,braking_gs,braking_gs,braking_gs,braking_gs,contact_ratio,contact_ratio,contact_ratio,contact_ratio,...,stride_length,stride_length,stride_length,stride_length,stride_pace,stride_pace,stride_pace,stride_pace,stride_pace,stride_pace
Unnamed: 0_level_1,count,mean,std,min,50%,max,count,mean,std,min,...,std,min,50%,max,count,mean,std,min,50%,max
subject_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
4,4419.0,6.499293,0.984354,4.2344,6.3594,15.1172,4419.0,69.174952,4.249126,57.3248,...,0.180958,1.7655,2.8192,3.7547,4419.0,3.723503,0.296227,2.1873,3.7382,5.1231
12,6925.0,10.866667,2.878367,0.5898,11.0195,15.9922,6925.0,50.09134,4.88724,37.234,...,0.248995,0.8955,3.0121,3.4693,6925.0,4.152045,0.498154,0.7953,4.2348,5.4753
16,4131.0,10.132741,2.430033,3.8438,9.7813,15.9531,4131.0,72.119921,6.783939,47.6578,...,0.309325,2.9347,3.7261,5.272,4131.0,5.109179,0.508904,3.7367,4.9581,7.1068
23,5409.0,7.123099,0.865533,1.9961,7.1563,15.4102,5409.0,62.755982,3.70497,46.6019,...,0.241349,1.9383,2.9524,4.1104,5409.0,4.144936,0.337158,2.6017,4.0865,5.7089
27,5768.0,9.204421,3.213571,3.2969,8.6484,15.9805,5768.0,66.580236,7.75864,48.8069,...,0.258883,0.7418,2.99865,3.3232,5768.0,4.012443,0.409855,1.0147,4.10135,4.9662


In [56]:
# dataframe of means for compare
treadmill_mean = treadmill_groupby_subject.mean().add_suffix("_mean")\
                 .reset_index()

#treadmill_mean

In [57]:
treadmill_sd = treadmill_groupby_subject.std().add_suffix("_sd")\
               .reset_index()

treadmill_sd

Unnamed: 0,subject_id,step_sd,stride_pace_sd,step_rate_sd,stride_length_sd,contact_time_sd,flight_ratio_sd,shock_sd,impact_gs_sd,braking_gs_sd,...,pronation_excursion_mp_to_sd,stance_excursion_fs_mp_sd,stance_excursion_mp_to_sd,split_stride_pace_sd,cycle_time_sd,split_cycle_time_sd,contact_ratio_sd,flight_time_sd,stride_angle_sd,footstrike_type_new_sd
0,4,644.777751,0.296227,4.379509,0.180958,15.829779,2.941312,2.225381,2.450211,0.984354,...,7.65124,4.038916,4.436419,0.296227,20.674442,20.674442,4.249126,10.642602,0.262188,1.151091
1,12,1109.146899,0.498154,7.715719,0.248995,23.177099,4.161686,2.939349,2.840182,2.878367,...,13.271246,6.651804,4.619771,0.498154,32.601278,32.601278,4.88724,12.631035,0.567803,1.895765
2,16,781.308023,0.508904,4.483081,0.309325,23.218967,4.710717,3.209668,3.379389,2.430033,...,4.613286,4.987866,6.614081,0.508904,20.326305,20.326305,6.783939,16.460558,0.303742,1.421542
3,23,781.184882,0.337158,2.620008,0.241349,8.899084,2.78882,2.056131,2.379779,0.865533,...,6.776256,5.381537,7.115671,0.337158,11.481364,11.481364,3.70497,10.725928,0.336919,1.533738
4,27,901.877475,0.409855,5.07473,0.258883,21.163841,5.533972,3.481362,4.074745,3.213571,...,5.583853,8.789567,15.398988,0.409855,21.760554,21.760554,7.75864,20.486709,0.600887,2.505027


In [58]:
# median descriptive statistic by subject
treadmill_median = treadmill_groupby_subject.median()\
                   .add_suffix("_median")\
                   .reset_index()

treadmill_median

Unnamed: 0,subject_id,step_median,stride_pace_median,step_rate_median,stride_length_median,contact_time_median,flight_ratio_median,shock_median,impact_gs_median,braking_gs_median,...,pronation_excursion_mp_to_median,stance_excursion_fs_mp_median,stance_excursion_mp_to_median,split_stride_pace_median,cycle_time_median,split_cycle_time_median,contact_ratio_median,flight_time_median,stride_angle_median,footstrike_type_new_median
0,4,1664,3.7382,159.1512,2.8192,309,18.4416,12.2223,10.2656,6.3594,...,3.3345,27.4606,59.7708,3.7382,754,754,68.8095,70,0.9604,2.1737
1,12,1928,4.2348,168.0672,3.0121,235,34.0974,16.514,12.0078,11.0195,...,3.7727,20.5262,41.8707,4.2348,714,714,49.0982,121,2.7395,4.15
2,16,1380,4.9581,160.8579,3.7261,320,14.6667,13.4803,8.4219,9.7813,...,9.802,24.0203,55.897,4.9581,746,746,74.2459,55,0.4695,3.1542
3,23,1354,4.0865,166.4355,2.9524,279,22.865,12.4162,10.0547,7.1563,...,13.617,22.0731,60.5597,4.0865,721,721,62.7273,82,1.2856,3.7092
4,27,1539,4.10135,164.1587,2.99865,285,21.6508,13.6088,8.49415,8.6484,...,12.3567,0.86135,67.12055,4.10135,731,731,64.3341,79,1.17925,9.75455


In [59]:
# calculate COV
treadmill_cov = treadmill_groupby_subject\
                .aggregate(lambda x: scipy.stats.variation(x, axis=None))\
                .add_suffix("_cov")\
                .reset_index()
treadmill_cov

#treadmill_cov.to_csv("treadmill_cov.csv")



Unnamed: 0,subject_id,step_cov,stride_pace_cov,step_rate_cov,stride_length_cov,contact_time_cov,flight_ratio_cov,shock_cov,impact_gs_cov,braking_gs_cov,...,pronation_excursion_mp_to_cov,stance_excursion_fs_mp_cov,stance_excursion_mp_to_cov,split_stride_pace_cov,cycle_time_cov,split_cycle_time_cov,contact_ratio_cov,flight_time_cov,stride_angle_cov,footstrike_type_new_cov
0,4,0.387595,0.079547,0.02747,0.064599,0.051414,0.161222,0.180395,0.235869,0.151438,...,39.945814,0.148979,0.073426,0.079547,0.02744,0.02744,0.061419,0.155175,0.273123,0.506061
1,12,0.575839,0.119969,0.045776,0.08445,0.097361,0.124812,0.181704,0.243662,0.264861,...,1.100526,0.32198,0.109598,0.119969,0.045691,0.045691,0.09756,0.106625,0.209509,0.460932
2,16,0.58184,0.099594,0.027765,0.08154,0.074587,0.288471,0.233914,0.381271,0.239791,...,0.425635,0.207947,0.119799,0.099594,0.027322,0.027322,0.094053,0.272422,0.531667,0.449125
3,23,0.577146,0.081335,0.01575,0.080695,0.032015,0.121805,0.162047,0.228925,0.1215,...,0.469596,0.255503,0.115402,0.081335,0.015909,0.015909,0.059032,0.12981,0.257438,0.383616
4,27,0.582675,0.102137,0.030779,0.088716,0.072925,0.27306,0.260597,0.458672,0.349103,...,0.466154,1.984318,0.220172,0.102137,0.029866,0.029866,0.116521,0.27744,0.524286,0.286667


In [60]:
treadmill_compare = pd.merge(treadmill_mean, treadmill_sd,
                             on = "subject_id", how = 'left')

treadmill_compare = pd.merge(treadmill_compare, treadmill_cov,
                             on = "subject_id", how = 'left')

treadmill_compare

Unnamed: 0,subject_id,step_mean,stride_pace_mean,step_rate_mean,stride_length_mean,contact_time_mean,flight_ratio_mean,shock_mean,impact_gs_mean,braking_gs_mean,...,pronation_excursion_mp_to_cov,stance_excursion_fs_mp_cov,stance_excursion_mp_to_cov,split_stride_pace_cov,cycle_time_cov,split_cycle_time_cov,contact_ratio_cov,flight_time_cov,stride_angle_cov,footstrike_type_new_cov
0,4,1663.345553,3.723503,159.408918,2.800926,307.854039,18.241775,12.33479,10.386849,6.499293,...,39.945814,0.148979,0.073426,0.079547,0.02744,0.02744,0.061419,0.155175,0.273123,0.506061
1,12,1926.001733,4.152045,168.54346,2.948203,238.035957,33.34134,16.17541,11.65542,10.866667,...,1.100526,0.32198,0.109598,0.119969,0.045691,0.045691,0.09756,0.106625,0.209509,0.460932
2,16,1342.660615,5.109179,161.444847,3.793059,311.263374,16.327978,13.719916,8.8624,10.132741,...,0.425635,0.207947,0.119799,0.099594,0.027322,0.027322,0.094053,0.272422,0.531667,0.449125
3,23,1353.40636,4.144936,166.336192,2.990602,277.936957,22.893609,12.687333,10.394497,7.123099,...,0.469596,0.255503,0.115402,0.081335,0.015909,0.015909,0.059032,0.12981,0.257438,0.383616
4,27,1547.688974,4.012443,164.861744,2.917873,290.189667,20.264773,13.358041,8.883011,9.204421,...,0.466154,1.984318,0.220172,0.102137,0.029866,0.029866,0.116521,0.27744,0.524286,0.286667


#### Merge data sets

In [61]:
full_compare = pd.merge(treadmill_compare, outdoor_compare, 
                        on = 'subject_id', how = 'left',
                        suffixes = ('_treadmill', '_outdoor'))

full_compare

Unnamed: 0,subject_id,step_mean_treadmill,stride_pace_mean_treadmill,step_rate_mean_treadmill,stride_length_mean_treadmill,contact_time_mean_treadmill,flight_ratio_mean_treadmill,shock_mean_treadmill,impact_gs_mean_treadmill,braking_gs_mean_treadmill,...,pronation_excursion_mp_to_cov_outdoor,stance_excursion_fs_mp_cov_outdoor,stance_excursion_mp_to_cov_outdoor,split_stride_pace_cov_outdoor,cycle_time_cov_outdoor,split_cycle_time_cov_outdoor,contact_ratio_cov_outdoor,flight_time_cov_outdoor,stride_angle_cov_outdoor,footstrike_type_new_cov_outdoor
0,4,1663.345553,3.723503,159.408918,2.800926,307.854039,18.241775,12.33479,10.386849,6.499293,...,27.368492,0.27099,0.111133,0.07218,0.018402,0.018402,0.069873,0.168422,0.320243,0.844574
1,12,1926.001733,4.152045,168.54346,2.948203,238.035957,33.34134,16.17541,11.65542,10.866667,...,-10.890062,2.129792,0.695743,0.408718,0.035883,0.035883,0.157319,0.338571,1.517422,1.695081
2,16,1342.660615,5.109179,161.444847,3.793059,311.263374,16.327978,13.719916,8.8624,10.132741,...,0.866008,0.174143,0.058082,0.048928,0.019126,0.019126,0.096655,0.251826,0.482508,0.501797
3,23,1353.40636,4.144936,166.336192,2.990602,277.936957,22.893609,12.687333,10.394497,7.123099,...,0.580791,0.34907,0.10477,0.089183,0.019732,0.019732,0.096296,0.194743,0.353344,0.422164
4,27,1547.688974,4.012443,164.861744,2.917873,290.189667,20.264773,13.358041,8.883011,9.204421,...,0.340015,0.690805,0.205615,0.099687,0.026192,0.026192,0.101453,0.307775,0.697321,0.511644


In [62]:
full_compare.to_csv('full_compare.csv')

In [63]:
full_compare.shape

(5, 139)

In [64]:
full_cov = pd.merge(treadmill_cov, outdoor_cov, how = 'left',
                    on = "subject_id",
                    suffixes = ('_treadmill', '_outdoor'))

full_cov.head()

Unnamed: 0,subject_id,step_cov_treadmill,stride_pace_cov_treadmill,step_rate_cov_treadmill,stride_length_cov_treadmill,contact_time_cov_treadmill,flight_ratio_cov_treadmill,shock_cov_treadmill,impact_gs_cov_treadmill,braking_gs_cov_treadmill,...,pronation_excursion_mp_to_cov_outdoor,stance_excursion_fs_mp_cov_outdoor,stance_excursion_mp_to_cov_outdoor,split_stride_pace_cov_outdoor,cycle_time_cov_outdoor,split_cycle_time_cov_outdoor,contact_ratio_cov_outdoor,flight_time_cov_outdoor,stride_angle_cov_outdoor,footstrike_type_new_cov_outdoor
0,4,0.387595,0.079547,0.02747,0.064599,0.051414,0.161222,0.180395,0.235869,0.151438,...,27.368492,0.27099,0.111133,0.07218,0.018402,0.018402,0.069873,0.168422,0.320243,0.844574
1,12,0.575839,0.119969,0.045776,0.08445,0.097361,0.124812,0.181704,0.243662,0.264861,...,-10.890062,2.129792,0.695743,0.408718,0.035883,0.035883,0.157319,0.338571,1.517422,1.695081
2,16,0.58184,0.099594,0.027765,0.08154,0.074587,0.288471,0.233914,0.381271,0.239791,...,0.866008,0.174143,0.058082,0.048928,0.019126,0.019126,0.096655,0.251826,0.482508,0.501797
3,23,0.577146,0.081335,0.01575,0.080695,0.032015,0.121805,0.162047,0.228925,0.1215,...,0.580791,0.34907,0.10477,0.089183,0.019732,0.019732,0.096296,0.194743,0.353344,0.422164
4,27,0.582675,0.102137,0.030779,0.088716,0.072925,0.27306,0.260597,0.458672,0.349103,...,0.340015,0.690805,0.205615,0.099687,0.026192,0.026192,0.101453,0.307775,0.697321,0.511644
