# Longitudinal examination of the bone mechanostat theory in children



Izabella A. Ludwa, Kevin Mongeon, Bareket Falk and Panagiota Klentrou




In [322]:
import os
import sys
import shutil
import pandas
import numpy
import ipystata
import matplotlib.pyplot as plt

from ipystata.config import config_stata
config_stata('/home/kmongeon/research/stata14/stata-mp',force_batch=True)

pandas.options.display.max_columns = 100
pandas.options.display.max_rows = 500

### Import data

### Izzy's notes

* In the torque file, session=0 pertains to “adult” data. This data should NOT be included in any of the analysis. 
* Session 1 would refer to data collected in the spring and session 2 would be data collected in the fall. We would keep track of which seasonal testing session the participant attended. 
* The Izzy file has a sequence of 1-7 (specifically the 1-6) matches the torque’s file new sequence of 1-6. 
* The izzy file has a sequence 7 which corresponds with the Spring 2013 knee extension sequence 7. 
* The torque file sequence 0 does not correspond with anything in the Izzy file (because there is no 0).

* the Izzy file sequence has a 7 because we did an additional testing session in spring 2013 to get more grip strength measures. 
* This was data I needed for my dissertation in order to increase my sample size for kids that had 3 grip strength testing occasions. 
* I did not collect grip strength measures in the spring or fall of 2010, which is why there is missing data for sequence 1 and 2. 
* We only did one extra testing session and it was in the spring which is why there is only a session=1 (and NO session=2 for 2013). I needed this data so it is included as sequence 1-7 in the Izzy file. This testing session was “extra” for the torque data Bareket was collecting. This is why there is a separate “extra” data file “Spring_2013_knee_extension”. That is why this file only has a session=1 and a sequence=7. Again, this lines up with the sequence 7 in the Izzy file. So anything with a 0 does not line up with anything in the Izzy file because I have no data for adults. 

In [376]:
d1 = pandas.read_csv('./data/IzzyLongitudinalWorkingMarch2015.csv')
d2 = pandas.read_csv('./data/torque.csv')
d3 = pandas.read_csv('./data/Spring_2013_knee_extension.csv')

In [377]:
d1 = d1[['ID', 'Session', 'Sequence', 'Gender', 'Radius_SOS', 'Tibial_SOS', 'Grip_Strength_Best', 'NTx/Creat', 'Mat_Offset_New', 'tot mvh', 'Age']]
d1 = d1.rename(columns={'ID': 'id', 'Session': 'session', 'Sequence': 'sequence'
                   , 'Gender': 'gender', 'Radius_SOS': 'rsos', 'Tibial_SOS': 'tsos'
                   , 'Grip_Strength_Best': 'grip', 'NTx/Creat': 'ntxc', 'Mat_Offset_New': 'matu', 'tot mvh': 'mvh', 'Age': 'age'})

In [378]:
d2 = d2[['ID', 'Session', 'Sequence', 'New_Sequence','ptiso']]
d2 = d2.rename(columns={'ID': 'id', 'Session': 'session', 'Sequence': 'sequence'
                   , 'New_Sequence': 'new_sequence', 'ptiso': 'ptiso'})


In [379]:
d3 = d3[['id', 'session', 'sequence', 'ptiso']]

In [380]:
# drop observations: session==0
d2 = d2[d2.session!=0]

In [381]:
d2 = d2.drop('sequence',1)
d2 = d2.rename(columns={'new_sequence':'sequence'})

In [382]:
dp = d2.append(d3)

In [383]:
dm = pandas.merge(d1, dp, 
                  left_on=['id', 'session', 'sequence'], 
                  right_on=['id', 'session', 'sequence'], how='outer')

In [384]:
dm['ptiso'] = dm['ptiso'].replace(-9999, numpy.nan)
dm['session'] = dm['session'].replace(1, 'spring')
dm['session'] = dm['session'].replace(2, 'fall')
dm = dm.rename(columns={'session':'season'})


In [385]:
dm['gender']  = dm['gender'].replace(0.0, 'boy')
dm['gender']  = dm['gender'].replace(1.0, 'girl')

In [386]:
dm = dm.sort_values(by=['id', 'sequence'])

In [392]:
dm = dm[['id', 'season', 'sequence', 'gender', 'rsos', 'tsos', 'grip', 'ptiso','ntxc',
       'matu', 'mvh', 'age']]

In [393]:
dm

Unnamed: 0,id,season,sequence,gender,rsos,tsos,grip,ptiso,ptiso.1,ntxc,matu,mvh,age
0,100,spring,1,boy,3828.0,3601.0,,113.928056,113.928056,711.84816,-1.67,105.71,11.75
1,100,spring,3,boy,3898.0,3629.0,27.0,136.023758,136.023758,760.093945,-0.71,93.62,12.71
2,100,spring,5,boy,3851.0,3677.0,37.0,177.053081,177.053081,543.734635,0.41,98.14,13.83
3,100,spring,7,boy,3952.0,3740.0,40.5,205.8,205.8,454.364338,1.17,88.14,14.74
4,101,spring,1,boy,3682.0,3603.0,,133.048794,133.048794,937.996794,-1.63,89.29,11.45
5,102,spring,1,boy,3935.0,3916.0,,171.360283,171.360283,435.430999,-1.24,86.57,12.29
6,102,spring,3,boy,3899.0,3863.0,27.0,,,650.394553,-0.27,68.71,13.26
7,102,spring,5,boy,3878.0,3908.0,42.0,210.997092,210.997092,906.064083,0.77,59.14,14.3
8,102,spring,7,boy,3951.0,4015.0,42.0,258.5,258.5,,2.08,60.71,15.38
9,103,spring,1,boy,3768.0,3686.0,,146.60028,146.60028,737.074774,-1.91,120.57,11.83


In [395]:
dm.to_csv('bone_muscle_merged.csv', index=False, na_rep='NaN')

In [348]:
d1.session.value_counts(dropna=False, sort=False)

1    244
2    183
Name: session, dtype: int64

In [120]:
d1.sequence.value_counts(dropna=False, sort=False)

1    44
2    38
3    78
4    71
5    71
6    74
7    51
Name: sequence, dtype: int64

In [121]:
d1.describe()

Unnamed: 0,id,session,sequence,gender,rsos,tsos,grip,ntxc,matu,mvh,age
count,427.0,427.0,427.0,427.0,417.0,417.0,381.0,386.0,427.0,308.0,427.0
mean,327.17096,1.428571,4.201405,0.470726,3815.769784,3687.139089,23.425276,540.102985,-0.861546,106.292565,11.848595
std,197.600195,0.495452,1.835154,0.499728,101.452313,109.664084,7.35446,265.05337,2.00923,41.683699,2.01088
min,100.0,1.0,1.0,0.0,3542.0,3367.0,9.0,56.308327,-5.3,13.57,7.5
25%,138.5,1.0,3.0,0.0,3746.0,3619.0,18.0,370.012518,-2.29,78.14,10.34
50%,190.0,1.0,4.0,0.0,3820.0,3689.0,22.25,513.997357,-0.98,99.2,11.85
75%,530.0,2.0,6.0,1.0,3878.0,3758.0,27.5,661.737729,0.485,133.6575,13.23
max,583.0,2.0,7.0,1.0,4140.0,4015.0,53.0,1754.324517,9.66,235.0,17.55


# torque

In [122]:
d2.columns

Index(['ID', 'Biodex_ID', 'Sequence', 'Session', 'New_Sequence', 'Gender',
       'Visit_One', 'Visit_Two', 'DOB', 'Age (days)', 'Age (years)', 'ptiso',
       'pt60', 'pt240'],
      dtype='object')

In [123]:
d2 = d2[['ID', 'Session', 'Sequence', 'New_Sequence', 'Gender', 'ptiso', 'Age (years)']]
d2 = d2.rename(columns={'ID': 'id', 'Session': 'session', 'Sequence': 'sequence'
                   , 'Gender': 'gender', 'New_Sequence': 'new_sequence', 'ptiso': 'ptiso', 'Age (years)': 'age'})

In [124]:
d2 = d2.rename(columns={'ID': 'id', 'Session': 'session', 'Sequence': 'sequence'
                   , 'Gender': 'gender', 'New_Sequence': 'new_sequence', 'ptiso': 'ptiso', 'Age (years)': 'age'})

In [125]:
d2.id.value_counts(dropna=False, sort=False)


512    3
513    2
514    3
515    3
516    3
517    3
518    3
519    3
520    3
521    3
522    1
523    3
524    3
525    3
526    3
527    3
528    3
529    1
530    3
531    3
532    2
533    3
534    2
535    3
536    2
537    2
538    2
539    2
540    1
541    2
542    2
543    2
544    2
545    2
546    2
547    2
548    2
549    2
550    2
551    2
552    2
553    2
554    2
555    1
556    2
557    2
558    1
559    1
560    2
561    2
562    1
563    2
564    1
565    2
566    2
567    2
568    1
569    2
570    1
571    1
572    2
573    2
574    2
575    2
576    1
577    2
579    1
581    1
582    1
583    1
100    3
101    1
102    3
103    2
104    2
105    3
106    3
107    3
108    2
109    3
110    3
111    1
112    3
113    3
114    1
115    3
116    1
117    3
118    3
119    3
120    3
121    3
122    3
123    1
124    3
125    3
126    3
127    3
128    3
129    3
130    3
131    3
132    3
133    3
134    3
135    3
136    2
137    3
138    3
139    3
140    3
1

In [126]:
d2 = d2.rename(columns={'ID': 'id', 'Session': 'session', 'Sequence': 'sequence'
                   , 'Gender': 'gender', 'New_Sequence': 'new_sequence', 'ptiso': 'ptiso'})

In [127]:
d2.session.value_counts(dropna=False, sort=False)

0     40
1    196
2    185
Name: session, dtype: int64

In [128]:
d2.sequence.value_counts(dropna=False, sort=False)

1    124
2    150
3    147
Name: sequence, dtype: int64

In [129]:
d2.new_sequence.value_counts(dropna=False, sort=False)

0    40
1    45
2    39
3    79
4    71
5    72
6    75
Name: new_sequence, dtype: int64

In [130]:
d2.describe()

Unnamed: 0,id,session,sequence,new_sequence,gender,ptiso,age
count,421.0,421.0,421.0,421.0,421.0,421.0,421.0
mean,374.052257,1.344418,2.054632,3.453682,0.465558,-1599.41798,12.571116
std,246.011802,0.645607,0.801403,1.906347,0.499406,3820.111329,3.416232
min,100.0,0.0,1.0,0.0,0.0,-9999.0,7.5
25%,139.0,1.0,1.0,2.0,0.0,68.41333,10.38
50%,501.0,1.0,2.0,4.0,0.0,107.22706,11.89
75%,542.0,2.0,3.0,5.0,1.0,155.718442,13.6
max,919.0,2.0,3.0,6.0,1.0,491.182248,27.49


# Spring_2013_knee_extension

In [131]:
d3.columns

Index(['session', 'sequence', 'id', 'ptiso'], dtype='object')

In [132]:
d3 = d3[['id', 'session', 'sequence', 'ptiso']]

In [133]:
d3.id.value_counts(dropna=False, sort=False)

512    1
514    1
515    1
516    1
517    1
518    1
159    1
148    1
149    1
151    1
152    1
153    1
155    1
156    1
541    1
542    1
543    1
544    1
161    1
162    1
548    1
549    1
550    1
551    1
552    1
556    1
557    1
186    1
545    1
188    1
189    1
190    1
191    1
579    1
120    1
100    1
102    1
103    1
107    1
109    1
110    1
113    1
500    1
117    1
502    1
532    1
504    1
121    1
508    1
503    1
Name: id, dtype: int64

In [134]:
d3.session.value_counts(dropna=False, sort=False)

1    50
Name: session, dtype: int64

In [135]:
d3.sequence.value_counts(dropna=False, sort=False)

7    50
Name: sequence, dtype: int64

In [141]:
d1.head()

Unnamed: 0,id,session,sequence,gender,rsos,tsos,grip,ntxc,matu,mvh,age
0,100,1,1,0,3828.0,3601.0,,711.84816,-1.67,105.71,11.75
1,100,1,3,0,3898.0,3629.0,27.0,760.093945,-0.71,93.62,12.71
2,100,1,5,0,3851.0,3677.0,37.0,543.734635,0.41,98.14,13.83
3,100,1,7,0,3952.0,3740.0,40.5,454.364338,1.17,88.14,14.74
4,101,1,1,0,3682.0,3603.0,,937.996794,-1.63,89.29,11.45


In [137]:
d2.head()

Unnamed: 0,id,session,sequence,new_sequence,gender,ptiso,age
0,100,1,1,1,0,113.928056,11.75
1,100,1,2,3,0,136.023758,12.71
2,100,1,3,5,0,177.053081,13.83
3,101,1,1,1,0,133.048794,11.45
4,102,1,1,1,0,171.360283,12.29


In [138]:
d3.head()

Unnamed: 0,id,session,sequence,ptiso
0,100,1,7,205.8
1,102,1,7,258.5
2,103,1,7,235.3
3,107,1,7,170.3
4,109,1,7,319.7


In [146]:
d1.groupby('session').sequence.value_counts()

session  sequence
1        3           78
         5           71
         7           51
         1           44
2        6           74
         4           71
         2           38
Name: sequence, dtype: int64

In [147]:
d2.groupby('session').new_sequence.value_counts()

session  new_sequence
0        0               40
1        3               79
         5               72
         1               45
2        6               75
         4               71
         2               39
Name: new_sequence, dtype: int64

In [148]:
d3.groupby('session').sequence.value_counts()

session  sequence
1        7           50
Name: sequence, dtype: int64