# Find subjects for local explanation analyses

Criterium: subjects should have no missing values in the demographic / health information of interest

In [1]:
import pandas as pd
from brain_age_prediction import utils

In [2]:
# save path for predictions
overview_path = '/ritter/share/projects/laura_riedel_thesis/predictions/predictions_overview_100-500p.csv'
# load data overview
data_overview_full = pd.read_csv(overview_path)
# limit to IDs present in heldout test set
data_overview_heldout = data_overview_full[data_overview_full['split']=='heldout_test'].copy()
data_overview_heldout.reset_index(inplace=True,drop=True)
data_overview_heldout = utils.detrend_bag(data_overview_heldout, models=['orig','new'])

# exclude rows with nans
no_nans = ((~data_overview_heldout['bmi'].isnull())
           & (~data_overview_heldout['digit substitution'].isnull())
           & (~data_overview_heldout['education'].isnull())
           & (~data_overview_heldout['fluid intelligence'].isnull())
           & (~data_overview_heldout['grip'].isnull())
           & (~data_overview_heldout['depressive episode'].isnull())
           & (~data_overview_heldout['all depression'].isnull())
           & (~data_overview_heldout['recurrent depressive disorder'].isnull())
           & (~data_overview_heldout['multiple sclerosis'].isnull())
           & (~data_overview_heldout['sex'].isnull())
           & (~data_overview_heldout['weekly beer'].isnull())
           & (~data_overview_heldout['genetic pc 1'].isnull())
           & (~data_overview_heldout['genetic pc 2'].isnull())
           & (~data_overview_heldout['genetic pc 3'].isnull()))
# define columns of interest
columns = ['eid', 'age'] + [col for col in data_overview_heldout.columns[3:-8]] + [col for col in data_overview_heldout.columns[-4:]]

In [3]:
# very young
data_overview_heldout[no_nans].sort_values(by='age', ascending=True)[columns].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,multiple sclerosis,sex,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended
1491,1137757,48,25.5993,20.0,20.0,7.0,50.0,0.0,0.0,0.0,0.0,1,3.0,-12.452,2.70432,-0.222601,1.116467,-0.5658,-7.226678,-8.934986
632,1058349,49,26.8186,18.0,20.0,3.0,26.0,0.0,0.0,0.0,0.0,0,5.0,-12.4057,5.60872,-2.97802,17.530006,14.007729,9.680329,6.13475
4234,1389126,49,31.902,21.0,20.0,6.0,36.0,0.0,0.0,0.0,0.0,1,20.0,-10.515,3.9776,1.07263,9.557858,12.574345,1.70818,4.701366
1582,1144954,49,27.3821,24.0,20.0,9.0,57.0,0.0,0.0,0.0,0.0,1,2.0,-11.7008,2.77122,-3.04094,-0.662121,2.957458,-8.511798,-4.91552
272,1025552,49,28.5813,22.0,20.0,5.0,44.0,0.0,0.0,0.0,0.0,1,4.0,-13.4821,2.90954,-0.818331,8.936436,6.064831,1.086759,-1.808148
1825,1169644,49,24.224,29.0,20.0,9.0,26.0,0.0,0.0,0.0,0.0,1,4.0,-13.2688,4.64589,1.14377,1.0485,3.534443,-6.801177,-4.338536
3416,1317950,50,21.2939,22.0,10.0,5.0,31.0,0.0,0.0,0.0,0.0,0,0.0,94.6584,-136.925,82.5171,0.593174,2.757504,-6.763036,-4.619268
936,1089320,50,26.8359,18.0,20.0,5.0,62.0,0.0,0.0,0.0,0.0,1,5.0,-12.0346,3.72065,-1.63488,4.20187,3.970905,-3.15434,-3.405866
1354,1125280,50,32.8291,25.0,15.0,5.0,53.0,0.0,0.0,0.0,0.0,1,3.0,-12.2558,3.39683,-0.74818,12.009953,3.785801,4.653743,-3.59097
1730,1159744,50,26.6385,23.0,20.0,9.0,58.0,0.0,0.0,0.0,0.0,1,10.0,-9.0751,3.75076,-1.608,6.127354,4.417351,-1.228856,-2.95942


In [4]:
# very old
data_overview_heldout[no_nans].sort_values(by='age', ascending=False)[columns].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,multiple sclerosis,sex,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended
3417,1318155,81,22.984,15.0,20.0,6.0,20.0,0.0,0.0,0.0,0.0,0,0.0,-11.3722,3.71402,-3.80391,-10.466156,-11.490562,-2.524873,-3.484895
4376,1402195,81,22.8262,20.0,20.0,7.0,35.0,0.0,0.0,0.0,0.0,1,0.0,-13.93,6.71783,-2.32277,-7.379013,-10.690979,0.56227,-2.685312
3983,1366740,81,19.6259,22.0,20.0,5.0,29.0,0.0,0.0,0.0,0.0,0,1.0,-11.0708,-0.551868,-2.59355,-15.122696,-14.865349,-7.181413,-6.859682
3085,1287410,80,25.8199,14.0,20.0,8.0,26.0,0.0,0.0,0.0,0.0,1,0.0,-13.9548,5.0965,-1.15876,-13.951309,-7.180679,-6.503494,0.32878
2971,1274603,80,28.3723,14.0,20.0,4.0,38.0,0.0,0.0,0.0,0.0,1,0.0,-12.1288,2.78037,-1.9207,-1.807571,0.765625,5.640244,8.275084
4136,1381380,80,24.423,13.0,20.0,5.0,20.0,0.0,0.0,0.0,0.0,0,0.0,-10.3272,2.25811,0.682063,-10.55162,-7.375259,-3.103805,0.1342
4777,1440190,80,24.8922,9.0,19.0,7.0,30.0,0.0,0.0,0.0,0.0,1,1.0,-9.9056,2.72288,-2.31051,-2.085754,-4.522217,5.362061,2.987242
3460,1321356,80,23.1978,10.0,15.0,9.0,18.0,0.0,0.0,0.0,0.0,1,0.0,-12.0543,4.53638,-2.93691,-10.321014,-14.976639,-2.873199,-7.46718
2514,1232260,80,21.2183,1.0,15.0,4.0,20.0,0.0,0.0,0.0,0.0,0,0.0,-9.96863,2.33497,2.63467,-7.770508,-7.578476,-0.322693,-0.069017
2611,1242018,80,24.5997,23.0,19.0,9.0,40.0,0.0,0.0,0.0,0.0,1,2.0,-13.0133,2.5267,-1.49307,-8.357498,-8.505112,-0.909683,-0.995652


For simplicity's sake, I will choose the first-ranking subject for both the very young and very old samples:
- young: subject 1137757
- old: subject 1318155

In [5]:
# prediction much older than true age (shallow model)
data_overview_heldout[no_nans].sort_values(by='bag_orig_detrended', ascending=False)[columns].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,multiple sclerosis,sex,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended
3956,1364474,71,22.084,6.0,20.0,7.0,30.0,0.0,0.0,0.0,0.0,0,0.0,-11.5235,5.7535,-2.62601,9.223434,10.724228,12.230042,13.767818
3948,1364183,63,33.8659,18.0,20.0,6.0,52.0,0.0,0.0,0.0,0.0,1,3.0,-13.7643,7.59221,-3.3583,12.854942,11.279037,11.91381,10.352966
4491,1412736,68,21.8944,15.0,19.0,7.0,22.0,0.0,0.0,0.0,0.0,0,0.0,-12.8314,0.243051,-1.59497,9.954872,4.595406,11.481077,6.150373
2770,1256413,62,20.9689,25.0,15.0,5.0,26.0,0.0,0.0,0.0,0.0,0,0.0,-11.6555,4.57984,1.04838,12.504929,8.79776,11.070329,7.375481
3346,1311923,74,23.8041,18.0,15.0,5.0,26.0,0.0,0.0,0.0,0.0,0,0.0,6.7404,5.11649,-2.6787,6.107819,4.266815,10.594829,8.799028
2671,1246637,53,31.1829,17.0,19.0,3.0,58.0,0.0,0.0,0.0,0.0,1,4.0,-10.6692,4.15848,-0.772086,16.365906,12.807861,10.490099,6.919713
1535,1141252,69,25.815,18.0,10.0,3.0,11.0,0.0,0.0,0.0,0.0,0,0.0,-14.4433,5.80553,-0.288483,8.44619,1.187698,10.465863,3.238873
2807,1259547,59,28.5375,14.0,20.0,7.0,42.0,0.0,0.0,0.0,0.0,1,2.0,-11.1963,3.04718,0.639169,13.219299,14.108459,10.304297,11.197558
4835,1445661,63,25.565,27.0,20.0,7.0,38.0,0.0,0.0,0.0,0.0,1,0.0,20.4299,-9.09112,27.5884,11.107239,6.99321,10.166107,6.067139
2482,1229284,74,26.2284,15.0,15.0,4.0,38.0,0.0,0.0,0.0,0.0,1,2.0,-12.0258,6.01755,-1.14041,5.637833,-2.716675,10.124843,1.815538


In [6]:
# prediction much older than true age (deep model)
data_overview_heldout[no_nans].sort_values(by='bag_new_detrended', ascending=False)[columns].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,multiple sclerosis,sex,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended
3956,1364474,71,22.084,6.0,20.0,7.0,30.0,0.0,0.0,0.0,0.0,0,0.0,-11.5235,5.7535,-2.62601,9.223434,10.724228,12.230042,13.767818
4757,1438262,59,18.2969,16.0,20.0,4.0,20.0,1.0,1.0,1.0,0.0,0,3.0,-13.4155,1.79436,0.182334,10.040665,15.234283,7.125663,12.323381
2807,1259547,59,28.5375,14.0,20.0,7.0,42.0,0.0,0.0,0.0,0.0,1,2.0,-11.1963,3.04718,0.639169,13.219299,14.108459,10.304297,11.197558
4035,1371802,61,26.5118,26.0,19.0,6.0,36.0,0.0,0.0,0.0,0.0,1,4.0,-7.51669,2.12231,-1.09883,5.13105,12.614227,3.202983,10.695741
4849,1446985,59,30.617,23.0,19.0,5.0,46.0,0.0,0.0,0.0,0.0,1,4.0,-12.0162,4.23413,-3.62151,8.651314,13.495262,5.736312,10.58436
3948,1364183,63,33.8659,18.0,20.0,6.0,52.0,0.0,0.0,0.0,0.0,1,3.0,-13.7643,7.59221,-3.3583,12.854942,11.279037,11.91381,10.352966
1422,1131993,61,32.0069,19.0,7.0,4.0,36.0,0.0,0.0,0.0,0.0,1,2.0,-7.32864,-8.904,-0.961118,10.925354,12.192017,8.997287,10.27353
4364,1401363,71,24.4919,16.0,15.0,9.0,40.0,0.0,0.0,0.0,0.0,1,0.0,-14.0534,3.86783,-2.11314,-1.460297,6.95636,1.546311,9.99995
3374,1314229,74,23.4865,7.0,20.0,5.0,32.0,0.0,0.0,0.0,0.0,1,2.0,-11.8902,5.52711,-3.83901,0.372177,5.41066,4.859187,9.942873
4879,1450407,76,25.3565,21.0,13.0,8.0,16.0,0.0,0.0,0.0,0.0,0,0.0,-13.463,3.77453,-0.483266,0.424561,4.370087,5.898506,9.894715


For cases of brain ages being much higher than true ages, both models have the same number one in their ranking: subject 1364474.

In [7]:
# prediction much younger than true age (shallow model)
data_overview_heldout[no_nans].sort_values(by='bag_orig_detrended', ascending=True)[columns].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,multiple sclerosis,sex,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended
1992,1184162,69,26.8905,18.0,20.0,4.0,26.0,0.0,0.0,0.0,0.0,1,1.0,-10.5327,3.49319,-1.50688,-21.213188,-21.445992,-19.193515,-19.394817
4160,1383475,69,24.1357,28.0,20.0,11.0,39.0,0.0,0.0,0.0,0.0,1,8.0,-12.0525,3.90022,-0.600046,-14.754501,-7.280476,-12.734829,-5.229301
3267,1305142,62,26.4157,27.0,19.0,7.0,22.0,0.0,0.0,0.0,0.0,1,5.0,-14.3698,-0.042826,1.4534,-10.485474,-3.813774,-11.920073,-5.236053
1041,1098221,52,27.1626,17.0,19.0,4.0,30.0,0.0,0.0,0.0,0.0,0,0.0,-13.1039,3.90495,-4.62974,-5.303051,3.726322,-11.672326,-2.658034
3508,1325914,53,27.0527,14.0,15.0,7.0,50.0,0.0,0.0,0.0,0.0,1,12.0,-13.4125,3.41827,-3.06645,-5.414597,-5.551502,-11.290404,-11.43965
2902,1268821,74,26.7746,19.0,20.0,3.0,43.0,0.0,0.0,0.0,0.0,1,2.0,102.522,-150.103,81.5255,-15.238331,-8.931396,-10.751321,-4.399183
2834,1262448,69,25.6639,22.0,20.0,9.0,50.0,0.0,0.0,0.0,0.0,1,1.0,-13.3352,6.60376,-1.55673,-12.739822,-4.817749,-10.72015,-2.766574
3820,1352461,75,26.0593,20.0,10.0,6.0,28.0,0.0,0.0,0.0,0.0,1,0.0,-15.2329,5.1198,-0.732138,-15.590584,-12.477036,-10.610106,-7.448615
1807,1167559,65,24.3844,13.0,20.0,5.0,18.0,0.0,0.0,0.0,0.0,0,2.0,-8.94561,5.52793,0.215461,-10.603779,-10.705269,-10.557976,-10.638925
2386,1218873,55,29.7744,27.0,20.0,12.0,30.0,0.0,0.0,0.0,0.0,1,2.0,-11.0439,4.40399,-0.450628,-5.211079,0.442238,-10.099951,-4.453495


In [8]:
# prediction much younger than true age (deep model)
data_overview_heldout[no_nans].sort_values(by='bag_new_detrended', ascending=True)[columns].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,multiple sclerosis,sex,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended
1992,1184162,69,26.8905,18.0,20.0,4.0,26.0,0.0,0.0,0.0,0.0,1,1.0,-10.5327,3.49319,-1.50688,-21.213188,-21.445992,-19.193515,-19.394817
4864,1448799,66,23.5482,15.0,20.0,8.0,12.0,0.0,0.0,0.0,0.0,0,2.0,-12.6246,4.20173,-2.95948,-8.648052,-13.540424,-8.108782,-12.977873
2787,1258107,61,27.2151,17.0,15.0,4.0,48.0,0.0,0.0,0.0,0.0,1,2.0,-10.0845,3.6766,-1.57083,-7.522968,-10.298618,-9.451035,-12.217105
3508,1325914,53,27.0527,14.0,15.0,7.0,50.0,0.0,0.0,0.0,0.0,1,12.0,-13.4125,3.41827,-3.06645,-5.414597,-5.551502,-11.290404,-11.43965
1807,1167559,65,24.3844,13.0,20.0,5.0,18.0,0.0,0.0,0.0,0.0,0,2.0,-8.94561,5.52793,0.215461,-10.603779,-10.705269,-10.557976,-10.638925
2786,1257964,59,28.1634,19.0,20.0,4.0,8.0,0.0,0.0,0.0,0.0,0,1.0,-11.4314,3.77235,-0.585954,-2.409645,-7.383686,-5.324647,-10.294588
3899,1359852,50,28.4665,26.0,19.0,7.0,32.0,0.0,0.0,0.0,0.0,0,0.0,-11.2426,1.55585,-3.18564,-1.955082,-2.548946,-9.311291,-9.925717
4056,1374443,55,22.6912,27.0,20.0,7.0,28.0,0.0,0.0,0.0,0.0,0,0.0,-11.116,6.15412,-4.09872,-1.009426,-5.005344,-5.898298,-9.901077
2448,1225260,68,28.8493,19.0,20.0,5.0,40.0,0.0,0.0,0.0,0.0,1,0.0,-10.824,4.20293,-0.249413,-10.67667,-11.013927,-9.150465,-9.45896
148,1014619,54,30.5623,32.0,20.0,7.0,27.0,0.0,0.0,0.0,0.0,0,0.0,-12.9281,3.04781,-0.78685,3.897453,-3.618961,-1.484886,-9.010902


For cases of brain ages being much lower than true ages, both models have the same number one in their ranking: subject 1184162.

In [9]:
# get difference between models
data_overview_heldout['bag diff'] = (data_overview_heldout['bag_orig'] - data_overview_heldout['bag_new']).abs()
data_overview_heldout['bag diff detrended'] = (data_overview_heldout['bag_orig_detrended'] - data_overview_heldout['bag_new_detrended']).abs()
data_overview_heldout

Unnamed: 0,eid,age,split,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,...,batch_nb_orig,predicted_age_orig,batch_nb_new,predicted_age_new,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended,bag diff,bag diff detrended
0,1000030,65,heldout_test,21.2569,,20.0,8.0,18.0,1.0,1.0,...,1.0,64.256958,1.0,71.693298,-0.743042,6.693298,-0.697239,6.759642,7.436340,7.456882
1,1000059,68,heldout_test,,22.0,20.0,7.0,,0.0,0.0,...,18.0,58.876446,18.0,58.413288,-9.123554,-9.586712,-7.597349,-8.031745,0.463158,0.434396
2,1000077,58,heldout_test,33.4834,18.0,19.0,3.0,48.0,,,...,34.0,63.836048,34.0,66.193604,5.836048,8.193604,2.427579,4.786494,2.357555,2.358915
3,1000100,57,heldout_test,23.4552,,20.0,,28.0,0.0,0.0,...,2.0,55.072990,2.0,57.406872,-1.927010,0.406872,-5.828947,-3.496446,2.333881,2.332501
4,1000202,67,heldout_test,26.6846,19.0,20.0,6.0,45.0,,,...,16.0,64.058601,16.0,67.427048,-2.941399,0.427048,-1.908661,1.485807,3.368446,3.394468
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,2035762,73,heldout_test,25.3688,12.0,7.0,5.0,24.0,0.0,0.0,...,7.0,77.852402,7.0,74.190857,4.852402,1.190857,8.845944,5.226862,3.661545,3.619082
4996,2264650,61,heldout_test,30.5389,,7.0,,41.0,0.0,0.0,...,29.0,61.950256,29.0,63.557865,0.950256,2.557865,-0.977811,0.639379,1.607609,1.617189
4997,2329354,68,heldout_test,22.5614,18.0,20.0,8.0,30.0,0.0,0.0,...,36.0,66.802460,36.0,65.315231,-1.197540,-2.684769,0.328665,-1.129802,1.487228,1.458467
4998,2457356,60,heldout_test,29.6270,18.0,20.0,8.0,22.0,1.0,1.0,...,9.0,61.610027,9.0,63.635746,1.610027,3.635746,-0.811507,1.221052,2.025719,2.032559


In [10]:
# max difference
data_overview_heldout[no_nans].sort_values(by='bag diff', ascending=False)[columns+['bag diff','bag diff detrended']].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,...,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended,bag diff,bag diff detrended
4968,1457519,59,24.049,26.0,20.0,9.0,48.0,0.0,0.0,0.0,...,6.0,-12.7438,4.27521,-2.56399,12.98111,1.826702,10.066107,-1.0842,11.154408,11.150307
1041,1098221,52,27.1626,17.0,19.0,4.0,30.0,0.0,0.0,0.0,...,0.0,-13.1039,3.90495,-4.62974,-5.303051,3.726322,-11.672326,-2.658034,9.029373,9.014292
4364,1401363,71,24.4919,16.0,15.0,9.0,40.0,0.0,0.0,0.0,...,0.0,-14.0534,3.86783,-2.11314,-1.460297,6.95636,1.546311,9.99995,8.416656,8.453639
2482,1229284,74,26.2284,15.0,15.0,4.0,38.0,0.0,0.0,0.0,...,2.0,-12.0258,6.01755,-1.14041,5.637833,-2.716675,10.124843,1.815538,8.354507,8.309304
3788,1350106,74,26.3565,16.0,19.0,4.0,34.0,0.0,0.0,0.0,...,6.0,-15.5866,3.28219,-3.86692,-8.842018,-0.55603,-4.355008,3.976183,8.285988,8.331191
1354,1125280,50,32.8291,25.0,15.0,5.0,53.0,0.0,0.0,0.0,...,3.0,-12.2558,3.39683,-0.74818,12.009953,3.785801,4.653743,-3.59097,8.224152,8.244713
3958,1364685,61,29.465,24.0,13.0,8.0,42.0,0.0,0.0,0.0,...,3.0,-10.9326,5.01633,-3.36331,-2.206276,5.748245,-4.134343,3.829759,7.954521,7.964102
4665,1429687,62,29.5098,24.0,20.0,7.0,31.0,0.0,0.0,0.0,...,6.0,-11.6891,1.53879,-0.97437,-3.541058,4.400703,-4.975657,2.978425,7.941761,7.954082
4004,1369645,77,29.5156,22.0,19.0,4.0,33.0,0.0,0.0,0.0,...,0.0,-12.6389,4.12183,-0.904572,3.673676,-4.265297,9.641088,1.755539,7.938972,7.885549
2834,1262448,69,25.6639,22.0,20.0,9.0,50.0,0.0,0.0,0.0,...,1.0,-13.3352,6.60376,-1.55673,-12.739822,-4.817749,-10.72015,-2.766574,7.922073,7.953575


In [11]:
# max difference (detrended)
data_overview_heldout[no_nans].sort_values(by='bag diff detrended', ascending=False)[columns+['bag diff','bag diff detrended']].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,...,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended,bag diff,bag diff detrended
4968,1457519,59,24.049,26.0,20.0,9.0,48.0,0.0,0.0,0.0,...,6.0,-12.7438,4.27521,-2.56399,12.98111,1.826702,10.066107,-1.0842,11.154408,11.150307
1041,1098221,52,27.1626,17.0,19.0,4.0,30.0,0.0,0.0,0.0,...,0.0,-13.1039,3.90495,-4.62974,-5.303051,3.726322,-11.672326,-2.658034,9.029373,9.014292
4364,1401363,71,24.4919,16.0,15.0,9.0,40.0,0.0,0.0,0.0,...,0.0,-14.0534,3.86783,-2.11314,-1.460297,6.95636,1.546311,9.99995,8.416656,8.453639
3788,1350106,74,26.3565,16.0,19.0,4.0,34.0,0.0,0.0,0.0,...,6.0,-15.5866,3.28219,-3.86692,-8.842018,-0.55603,-4.355008,3.976183,8.285988,8.331191
2482,1229284,74,26.2284,15.0,15.0,4.0,38.0,0.0,0.0,0.0,...,2.0,-12.0258,6.01755,-1.14041,5.637833,-2.716675,10.124843,1.815538,8.354507,8.309304
1354,1125280,50,32.8291,25.0,15.0,5.0,53.0,0.0,0.0,0.0,...,3.0,-12.2558,3.39683,-0.74818,12.009953,3.785801,4.653743,-3.59097,8.224152,8.244713
3958,1364685,61,29.465,24.0,13.0,8.0,42.0,0.0,0.0,0.0,...,3.0,-10.9326,5.01633,-3.36331,-2.206276,5.748245,-4.134343,3.829759,7.954521,7.964102
4665,1429687,62,29.5098,24.0,20.0,7.0,31.0,0.0,0.0,0.0,...,6.0,-11.6891,1.53879,-0.97437,-3.541058,4.400703,-4.975657,2.978425,7.941761,7.954082
2834,1262448,69,25.6639,22.0,20.0,9.0,50.0,0.0,0.0,0.0,...,1.0,-13.3352,6.60376,-1.55673,-12.739822,-4.817749,-10.72015,-2.766574,7.922073,7.953575
4004,1369645,77,29.5156,22.0,19.0,4.0,33.0,0.0,0.0,0.0,...,0.0,-12.6389,4.12183,-0.904572,3.673676,-4.265297,9.641088,1.755539,7.938972,7.885549


Looking at the maximum BAG differences between model predictions, both the "original" and the detrended BAGs result in almost identical top 10 rankings (only 9 and 10 are swapped). The highest ranking sub in both cases is: subject 1457519

In [12]:
# min difference 
data_overview_heldout[no_nans].sort_values(by='bag diff', ascending=True)[columns+['bag diff','bag diff detrended']].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,...,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended,bag diff,bag diff detrended
332,1031078,58,30.9633,24.0,19.0,8.0,40.0,0.0,0.0,0.0,...,2.0,-9.16589,3.51011,-1.57675,12.719391,12.724068,9.310921,9.316958,0.004677,0.006037
3414,1317822,68,27.4691,15.0,10.0,6.0,40.0,0.0,0.0,0.0,...,1.0,-13.1456,6.21674,-1.55287,1.406654,1.411583,2.93286,2.96655,0.004929,0.03369
4235,1389218,60,23.7582,17.0,15.0,7.0,24.0,0.0,0.0,0.0,...,0.0,-12.2509,4.30612,-1.30987,3.437328,3.442513,1.015794,1.027818,0.005184,0.012025
3962,1364956,72,24.8771,14.0,20.0,6.0,36.0,0.0,0.0,0.0,...,15.0,-12.7563,3.48912,-2.58297,-2.894646,-2.889091,0.60543,0.650706,0.005554,0.045277
4900,1451946,73,30.7107,10.0,20.0,6.0,46.0,0.0,0.0,0.0,...,0.0,-12.1251,4.03849,-3.84724,-0.905197,-0.91095,3.088346,3.125056,0.005753,0.03671
4641,1426814,64,24.2209,14.0,20.0,7.0,20.0,0.0,0.0,0.0,...,2.0,-12.6299,3.64182,-2.32894,-1.163151,-1.170139,-1.610815,-1.600003,0.006989,0.010813
4096,1377640,52,30.0085,21.0,20.0,8.0,56.0,0.0,0.0,0.0,...,4.0,-10.5298,2.66272,-0.873506,6.302193,6.312489,-0.067082,-0.071867,0.010296,0.004785
3351,1312176,79,27.7254,6.0,15.0,5.0,27.0,0.0,0.0,0.0,...,10.0,-13.8375,4.01783,-3.64333,-4.747574,-4.758247,2.206774,2.255004,0.010674,0.04823
4135,1381322,79,23.6577,16.0,19.0,7.0,40.0,0.0,0.0,0.0,...,0.0,-10.7403,3.27624,-1.71147,-4.833069,-4.846535,2.121279,2.166717,0.013466,0.045438
4904,1452267,69,29.1416,17.0,20.0,7.0,47.0,0.0,0.0,0.0,...,0.0,-13.84,2.13743,-3.61988,-6.401356,-6.416119,-4.381683,-4.364944,0.014763,0.016739


In [13]:
# min difference (detrended)
data_overview_heldout[no_nans].sort_values(by='bag diff detrended', ascending=True)[columns+['bag diff','bag diff detrended']].head(10)

Unnamed: 0,eid,age,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,recurrent depressive disorder,...,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,bag_orig,bag_new,bag_orig_detrended,bag_new_detrended,bag diff,bag diff detrended
796,1075092,67,22.1384,17.0,10.0,4.0,20.0,0.0,0.0,0.0,...,0.0,-12.4378,3.29044,-0.210425,-5.625736,-5.654293,-4.592998,-4.595534,0.028557,0.002535
4096,1377640,52,30.0085,21.0,20.0,8.0,56.0,0.0,0.0,0.0,...,4.0,-10.5298,2.66272,-0.873506,6.302193,6.312489,-0.067082,-0.071867,0.010296,0.004785
332,1031078,58,30.9633,24.0,19.0,8.0,40.0,0.0,0.0,0.0,...,2.0,-9.16589,3.51011,-1.57675,12.719391,12.724068,9.310921,9.316958,0.004677,0.006037
4648,1427566,78,23.5177,16.0,10.0,7.0,30.0,0.0,0.0,0.0,...,3.0,-10.8598,3.29852,-3.48772,-8.367172,-8.415024,-1.906292,-1.89798,0.047852,0.008312
1053,1099607,67,24.7556,19.0,20.0,9.0,27.0,0.0,0.0,0.0,...,0.0,-14.2863,4.82959,-3.58727,3.67894,3.642349,4.711678,4.701109,0.036591,0.010569
4641,1426814,64,24.2209,14.0,20.0,7.0,20.0,0.0,0.0,0.0,...,2.0,-12.6299,3.64182,-2.32894,-1.163151,-1.170139,-1.610815,-1.600003,0.006989,0.010813
4235,1389218,60,23.7582,17.0,15.0,7.0,24.0,0.0,0.0,0.0,...,0.0,-12.2509,4.30612,-1.30987,3.437328,3.442513,1.015794,1.027818,0.005184,0.012025
3114,1290223,71,30.7174,22.0,20.0,8.0,42.0,0.0,0.0,0.0,...,2.0,-12.7798,4.53645,-3.55024,-6.295181,-6.34597,-3.288574,-3.30238,0.050789,0.013806
4904,1452267,69,29.1416,17.0,20.0,7.0,47.0,0.0,0.0,0.0,...,0.0,-13.84,2.13743,-3.61988,-6.401356,-6.416119,-4.381683,-4.364944,0.014763,0.016739
4487,1412450,61,24.2969,25.0,19.0,6.0,20.0,0.0,0.0,0.0,...,0.0,-15.2581,3.02657,-1.60446,3.772217,3.744255,1.84415,1.825768,0.027962,0.018381


The lists for minimal BAG differences across model predictions is more varied between the "original" and detrended BAGs than the maximal differences viewed before. In both lists, one subject comes in the top 3: subject 1031078