### Prediction of Skeletal Muscle Mass with DXA Scan Accuracy Using Smartphone Imaging

# Imports

In [75]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

pd.set_option('display.max_columns', 55)

# Datasets

* DXA - represents the data from the DXA scans

* SS20 - represents the data captured from smartphone 3D imaging (Size Stream Scan)

In [76]:
dfs = {
    'dxa': {
        'file' : './data/MusclePredictionAnalysis(002).xlsx',
        'sheet' : 'All DXA',
        #'cols' : range(7, 32) 
    },
    'ss20': {
        'file': './data/MusclePredictionAnalysis(002).xlsx',
        'sheet': 'All SS20',
        #'cols': range(6, 52)
    }
}

input_df = dfs['ss20']
output_df = dfs['dxa']

# in_data  = pd.read_excel(input_df['file'], sheet_name=input_df['sheet'], usecols=input_df['cols'])
# out_data = pd.read_excel(output_df['file'], sheet_name=output_df['sheet'], usecols=output_df['cols'])
in_data  = pd.read_excel(input_df['file'], sheet_name=input_df['sheet'])
out_data = pd.read_excel(output_df['file'], sheet_name=output_df['sheet'],)


# Filling Missing Values

In [47]:
in_data.isna().any(), out_data.isna().any()

(0                                False
 PPT ID                            True
 Site                             False
 Age                              False
 Gender                           False
 Race                             False
 ALM                              False
 ALM (adjusted)                   False
 Height (cm)                      False
 Weight (kg)                      False
 Abdomen Circumference             True
 Ankle Circumference Left          True
 Arm Length Left                   True
 Arm Volume Left                   True
 Bicep Circumference Left          True
 Calf Circumference Left           True
 Chest                             True
 Collar Circumference              True
 Forearm Circumference Left        True
 Head Circumference                True
 Hip Circumference                 True
 Horizontal Waist                  True
 Inseam Left                       True
 Leg Volume Left                   True
 MidThigh Circumference Left       True


In [21]:
len(in_data.columns), len(out_data.columns)

(52, 32)

In [77]:
in_data = in_data.drop(columns = [0, 'PPT ID', 'Site'])

In [49]:
in_data

Unnamed: 0,Age,Gender,Race,ALM,ALM (adjusted),Height (cm),Weight (kg),Abdomen Circumference,Ankle Circumference Left,Arm Length Left,Arm Volume Left,Bicep Circumference Left,Calf Circumference Left,Chest,Collar Circumference,Forearm Circumference Left,Head Circumference,Hip Circumference,Horizontal Waist,Inseam Left,Leg Volume Left,MidThigh Circumference Left,Narrow Waist,Outside Leg Length Left,Seat Circumference,Surface Area Arm Left,Surface Area Leg Left,Surface Area Torso,Surface Area Total,Thigh Circumference Left,Torso Volume,Upper Arm Circumference Left,Volume,Waist Circumference,Subject Height,Ankle Circumference Right,Arm Length Right,Arm Volume Right,Bicep Circumference Right,Calf Circumference Right,Forearm Circumference Right,Inseam Right,Leg Volume Right,MidThigh Circumference Right,Outside Leg Length Right,1 Surface Area Arm Right,1 Surface Area Leg Right,1 Thigh Circumference Right,Upper Arm Circumference Right
0,41,Female,Black or African American,30.284662,30.284662,168.6,118.70,132.63000,24.650000,55.55000,5457.650000,41.78000,46.04000,133.70000,41.47000,30.740000,55.61000,133.67000,133.69000,78.88000,15265.050000,68.38000,117.72000,106.03000,133.18000,1778.59000,5039.85000,7600.51000,21223.3600,80.12000,81900.05000,44.61000,123403.42000,129.76000,167.56000,25.750000,52.81000,5478.740000,44.010000,46.04000,32.270000,79.00000,,67.78000,106.53000,1640.82000,5163.59000,79.80000,46.23000
1,33,Female,Asian,21.228207,21.228207,157.2,83.10,112.16000,25.350000,58.27500,5443.915000,40.13000,42.39500,106.51000,36.11500,32.390000,59.49000,116.09500,106.72000,74.29000,12323.305000,58.92500,94.18500,97.74500,114.11000,1878.66500,4504.94500,5773.66000,18613.9200,70.33500,51135.76000,40.97500,86456.05500,108.23500,158.95500,24.055000,57.57000,5554.110000,39.290000,42.20500,32.300000,74.12500,11998.945000,57.36000,96.81500,1864.38000,4592.27000,70.07500,40.72000
2,65,Male,Black or African American,25.601226,25.601226,171.6,79.60,96.76000,22.950000,59.17500,4933.300000,34.72500,34.90500,101.67500,42.96500,30.475000,56.34500,99.05000,97.94500,77.53500,8351.140000,44.84500,96.33000,109.22000,97.80500,1828.04000,4122.76500,6684.53500,18472.2650,56.77000,53815.91000,36.58500,80830.30500,91.02000,169.52000,23.000000,57.17000,5692.450000,36.045000,33.84500,31.110000,78.02000,8037.555000,44.12500,109.14000,1776.26500,4060.66500,56.05000,38.98500
3,45,Female,Black or African American,20.277196,20.277196,165.4,86.20,116.00500,23.345000,58.97500,4591.755000,38.35000,39.53500,106.84000,34.64500,29.815000,57.91000,116.77500,110.00000,79.22000,12148.475000,55.41500,99.50000,104.56000,115.21500,1782.61000,4636.02000,6025.17000,18734.7900,70.75500,54710.55000,39.77000,87148.96000,112.36000,165.95500,22.910000,56.43000,4428.160000,36.390000,38.49000,28.490000,79.21500,11270.055000,53.74000,106.18500,1894.21500,4396.78500,68.25500,39.74500
4,22,Female,Black or African American,23.242747,23.242747,169.7,82.20,104.06500,25.555000,60.83500,4998.545000,37.26000,36.39000,97.61500,36.70500,30.270000,57.80500,113.51000,98.92000,65.15000,7213.870000,42.56000,83.44500,103.84000,111.29000,2004.61500,3846.03000,7537.78000,19011.6850,53.53500,59427.06000,38.70500,83171.75500,99.56000,169.35500,26.075000,59.73000,4501.100000,35.225000,36.20000,30.555000,65.04000,7031.200000,41.91000,103.61000,1800.63000,3822.63000,53.55000,38.84000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
773,59,Male,NHOPI,37.772967,35.884319,177.6,111.65,111.08182,27.066240,59.43092,4841.240953,41.79824,43.73626,118.82120,46.25848,31.689040,55.53964,113.06810,110.50016,74.50074,11300.052547,52.05476,111.97844,104.86136,111.46282,2006.76556,4690.73304,7872.08520,21478.0628,63.36284,76003.36980,43.25620,108926.69241,103.92664,177.23358,26.850340,58.52160,5386.439770,42.123360,43.97502,34.688780,75.13320,11396.080953,52.01412,104.67594,2036.83188,4871.45356,63.47206,45.10786
774,57,Female,NHOPI,16.692289,15.857674,164.7,52.15,84.63026,18.610072,55.12816,2923.458640,27.75966,31.27248,81.85404,30.72384,22.424898,56.49976,92.54744,75.68692,73.25106,6198.748317,40.80256,74.00036,100.98278,88.99652,1353.50056,3496.46784,5337.73960,15113.8100,49.53508,35168.35531,30.70098,53695.61057,75.16622,165.12540,19.620484,55.88000,3013.915432,27.287220,32.66694,23.005034,73.46442,6390.805129,40.50792,101.43236,1298.52952,3627.63700,50.58410,30.57906
775,27,Male,NHOPI,28.939233,27.492271,170.1,73.80,92.18422,24.268430,60.71362,3941.097550,32.81172,34.73450,95.97898,37.36848,28.112720,56.89600,100.12934,89.88806,74.79284,7980.026087,43.04538,87.43696,98.99904,99.42068,1779.46160,4002.04656,6099.91436,17712.6756,53.74386,47735.62230,34.31286,71952.47868,89.08034,170.89120,24.440388,59.79668,4245.242126,32.837120,34.75228,27.932380,74.95286,8050.818359,42.66184,99.02444,1691.84344,4139.28060,54.73700,35.04946
776,36,Female,Black,20.023615,19.022434,165.2,55.35,81.00822,21.394166,53.89626,3125.511583,27.48026,33.27908,78.03388,31.82620,24.668226,59.67730,97.05086,73.96226,76.44892,7630.980857,43.26128,67.13220,103.06050,92.70746,1412.98800,3840.16588,5063.20700,15610.6140,52.25288,34404.71645,29.46908,56130.73363,75.88504,165.50386,21.701506,54.29250,3221.539989,27.238960,33.62452,24.583390,76.12126,7747.984751,43.60926,102.96906,1307.49780,3986.81984,52.85994,28.01366


In [78]:
in_data['Gender'] = in_data['Gender'].map({'Male' : 1, 'Female' : 0})

in_data['Race'] = LabelEncoder().fit_transform(in_data['Race'])

In [79]:
out_data = out_data.drop(columns = [0, 'Subject ID'])

In [52]:
out_data

Unnamed: 0,Age,Height (cm),Weight,Gender,Race,DXA Weight,Total Fat,Total Lean,Total BMC,ALM,ALM (adjusted),Fat Mass - Total,Lean Mass - Total,Bone Mass - Total,ALM.1,Fat Mass - Trunk,Lean Mass - Trunk,Bone Mass - Trunk,Fat Mass - Left Arm,Lean Mass - Left Arm,Bone Mass - Left Arm,Fat Mass - Left Leg,Lean Mass - Left Leg,Bone Mass - Left Leg,Fat Mass - Right Arm,Lean Mass - Right Arm,Bone Mass - Right Arm,Fat Mass - Right Leg,Lean Mass - Right Leg,Bone Mass - Right Leg
0,41,168.6,118.70,Female,Black or African American,118.700000,53.288699,64.470665,2.957961,30.284662,30.284662,53288.698821,64470.664658,2957.961303,30284.662116,25634.473699,31087.226207,785.134371,,3473.826209,224.117069,8453.888680,11254.045397,534.531200,,3473.826209,224.117069,8715.957063,12082.964301,549.369293
1,33,157.2,83.10,Female,Asian,83.100000,34.798224,46.680417,2.083909,21.228207,21.228207,34798.223647,46680.417459,2083.908569,21228.206879,14816.142621,22424.090409,531.476720,2390.147627,2644.280227,165.542965,7078.959479,8004.538509,378.119459,2351.366787,2791.606955,178.552697,7043.388160,7787.781188,376.706868
2,65,171.6,79.60,Male,Black or African American,79.600000,19.171470,57.316601,2.647635,25.601226,25.601226,19171.469722,57316.601391,2647.634953,25601.225644,9788.431754,28027.810522,547.389902,1299.532354,3827.025940,196.664991,2721.311060,8796.799446,490.872259,1311.344563,4052.388921,210.129367,2698.328720,8925.011336,507.251555
3,45,165.4,86.20,Female,Black or African American,86.200000,39.368654,43.657405,2.329152,20.277196,20.277196,39368.654186,43657.404961,2329.151939,20277.195720,17541.392237,20277.927473,582.168799,2514.370920,2248.754857,166.735274,8037.520557,7804.231263,421.344386,2425.442466,2550.029656,190.956096,7697.844306,7674.179944,424.017168
4,22,169.7,82.20,Female,Black or African American,82.200000,29.893069,49.455134,2.947177,23.242747,23.242747,29893.068616,49455.133886,2947.176902,23242.746612,12087.269196,22956.698261,793.435024,2023.216036,2699.059811,224.920755,6367.724886,8799.355932,532.846214,2047.636383,2866.343831,238.831284,6185.298650,8877.987038,566.389704
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
773,59,177.6,111.65,Male,NHOPI,112.321534,30.035484,82.286050,2.790665,37.772967,35.884319,30035.484087,82286.049820,2790.665436,37772.966932,16106.181402,39831.988593,698.231565,2236.499347,6019.050052,255.895215,4261.280894,12830.783068,547.027873,2236.499347,6019.050052,255.895215,3962.803733,12904.083761,556.744240
774,57,164.7,52.15,Female,NHOPI,52.228525,9.940098,42.288427,2.078668,16.692289,15.857674,9940.098243,42288.427135,2078.668157,16692.288670,3710.102319,22295.571054,515.860518,701.598192,2087.915833,120.074723,2042.221152,5974.343769,350.861405,635.763627,2480.677825,138.085654,2065.175741,6149.351244,349.959116
775,27,170.1,73.80,Male,NHOPI,74.023970,11.073599,62.950371,2.657213,28.939233,27.492271,11073.599308,62950.370808,2657.213037,28939.233115,4548.459609,29825.915736,625.764579,627.313861,4226.620504,211.159731,2115.516051,10181.257995,517.457357,623.120372,4477.187064,228.808317,2130.531088,10054.167552,462.093810
776,36,165.2,55.35,Female,Black,54.669702,8.313596,46.356106,2.011033,20.023615,19.022434,8313.595621,46356.106453,2011.032592,20023.614628,2625.556494,22843.835838,548.142274,527.598349,2589.716077,130.775541,1834.688815,7319.852119,322.992233,474.095063,2650.699921,134.842151,2002.599650,7463.346511,340.149032


In [80]:
out_data['Gender'] = out_data['Gender'].map({'Male' : 1, 'Female' : 0})

out_data['Race'] = LabelEncoder().fit_transform(out_data['Race'])

In [60]:
in_data.isna().sum(), out_data.isna().sum()

(Age                                0
 Gender                             1
 Race                               0
 ALM                                0
 ALM (adjusted)                     0
 Height (cm)                        0
 Weight (kg)                        0
 Abdomen Circumference             10
 Ankle Circumference Left          10
 Arm Length Left                   19
 Arm Volume Left                   18
 Bicep Circumference Left           9
 Calf Circumference Left           10
 Chest                             20
 Collar Circumference              20
 Forearm Circumference Left        15
 Head Circumference                15
 Hip Circumference                 16
 Horizontal Waist                   9
 Inseam Left                        3
 Leg Volume Left                    5
 MidThigh Circumference Left        7
 Narrow Waist                      12
 Outside Leg Length Left            9
 Seat Circumference                10
 Surface Area Arm Left             12
 Surface Are

In [81]:
# in_data.dropna(inplace=True)
# out_data.dropna(inplace=True)

in_data = in_data.fillna(in_data.mean())
out_data = out_data.fillna(out_data.mean())

In [62]:
in_data.isna().sum(), out_data.isna().sum()

(Age                              0
 Gender                           0
 Race                             0
 ALM                              0
 ALM (adjusted)                   0
 Height (cm)                      0
 Weight (kg)                      0
 Abdomen Circumference            0
 Ankle Circumference Left         0
 Arm Length Left                  0
 Arm Volume Left                  0
 Bicep Circumference Left         0
 Calf Circumference Left          0
 Chest                            0
 Collar Circumference             0
 Forearm Circumference Left       0
 Head Circumference               0
 Hip Circumference                0
 Horizontal Waist                 0
 Inseam Left                      0
 Leg Volume Left                  0
 MidThigh Circumference Left      0
 Narrow Waist                     0
 Outside Leg Length Left          0
 Seat Circumference               0
 Surface Area Arm Left            0
 Surface Area Leg Left            0
 Surface Area Torso         

In [82]:
len(in_data), len(out_data)

(778, 778)

# Train / Test Split

In [86]:
train_percent, seed = 0.8, 1

in_training_data = in_data.sample(frac=train_percent, random_state=seed)
out_training_data = out_data.sample(frac=train_percent, random_state=seed)

in_testing_data = in_data.drop(in_training_data.index)
out_testing_data = out_data.drop(out_training_data.index) 

# Linear Regression

In [87]:
model = LinearRegression().fit(in_training_data, out_training_data)
A, b = model.coef_, model.intercept_
y_pred = model.predict(in_testing_data)

print(f"R² Score = {model.score(in_training_data, out_training_data)}")
print(f'MSE = {mean_squared_error(out_testing_data, y_pred)}')
print(f'RMSE = {mean_squared_error(out_testing_data, y_pred, squared=False)}')

R² Score = 0.946313764086316
MSE = 700306.2670363616
RMSE = 393.73983000908845
