# Get predictions + performance of models on heldout test set

In [1]:
import pandas as pd
import torch
from pytorch_lightning import Trainer
from brain_age_prediction import data, models, utils

In [2]:
torch.set_num_threads(2)

In [3]:
# paths to best model checkpoints
orig_model_path = 'lightweight-brain-age-prediction/umd5tlvz/checkpoints/epoch=57-step=13108.ckpt'
new_model_path = 'lightweight-brain-age-prediction/nx218mm3/checkpoints/epoch=26-step=6102.ckpt'
# save path for predictions
preds_save_path = '/ritter/share/projects/laura_riedel_thesis/predictions/'

## "Original" ICA100 model architecture with depth=1

In [4]:
utils.make_reproducible()
# preps
orig_datamodule = data.UKBBDataModule(dev=False)
orig_trainer = Trainer(accelerator='cpu', deterministic=True, logger=False, enable_checkpointing=False)
orig_model = models.variable1DCNN.load_from_checkpoint(orig_model_path)
orig_model.eval()
# get + save predictions
utils.predict_w_model(orig_trainer, orig_model, orig_datamodule, preds_save_path+'original_model_preds_100-500p.csv', output=False)
# test
orig_trainer.test(model=orig_model, datamodule=orig_datamodule)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Predicting: 0it [00:00, ?it/s]

  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss            31.08278465270996
        test_mae             4.453372478485107
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 31.08278465270996, 'test_mae': 4.453372478485107}]

In [5]:
# save heldout participant ID - age overview (= same for both models)
utils.save_heldout_data_info('../../data/schaefer/overview_heldout_test_set_100-500p.csv', orig_datamodule)

## New deeper model architecture with depth=4

In [6]:
utils.make_reproducible()
# preps
new_datamodule = data.UKBBDataModule(dev=False)
new_trainer = Trainer(accelerator='cpu', deterministic=True, logger=False, enable_checkpointing=False)
new_model = models.variable1DCNN.load_from_checkpoint(new_model_path)
new_model.eval()
# get + save predictions
utils.predict_w_model(new_trainer, new_model, new_datamodule, preds_save_path+'new_model_preds_100-500p.csv', output=False)
# test
new_trainer.test(model=new_model, datamodule=new_datamodule)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Predicting: 0it [00:00, ?it/s]

  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           30.271772384643555
        test_mae             4.405776500701904
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 30.271772384643555, 'test_mae': 4.405776500701904}]

## Create + save overview

In [7]:
ukbb_dir = '/ritter/share/data/UKBB/ukb_data/'
data_info_path = '../../data/schaefer/'
heldout_path = data_info_path+'overview_heldout_test_set_100-500p.csv'
data_overview = utils.merge_metadata_with_splitinfos(ukbb_dir, data_info_path, heldout_path)

# load predictions
orig_model_preds = pd.read_csv(preds_save_path+'original_model_preds_100-500p.csv')
new_model_preds = pd.read_csv(preds_save_path+'new_model_preds_100-500p.csv')
# add predictions to overview
data_overview = data_overview.merge(orig_model_preds, how='left', on='eid')
data_overview = data_overview.merge(new_model_preds, how='left', on='eid', suffixes=('_orig', '_new'))
data_overview = utils.calculate_bag(data_overview, models=['orig','new'])
data_overview

Unnamed: 0,eid,age,split,bmi,digit substitution,education,fluid intelligence,grip,depressive episode,all depression,...,weekly beer,genetic pc 1,genetic pc 2,genetic pc 3,batch_nb_orig,predicted_age_orig,batch_nb_new,predicted_age_new,bag_orig,bag_new
0,1000014,61,train,23.4720,16.0,10.0,8.0,18.0,,,...,0.0,-9.69796,4.38910,-1.121000,,,,,,
1,1000023,66,train,30.7894,,7.0,4.0,16.0,,,...,3.0,-13.23580,3.27728,-2.250050,,,,,,
2,1000041,61,train,22.7013,23.0,15.0,3.0,38.0,0.0,0.0,...,16.0,-13.54560,4.86490,-0.990955,,,,,,
3,1000062,69,train,30.4367,19.0,15.0,7.0,30.0,0.0,0.0,...,4.0,-12.49330,5.05782,-0.544850,,,,,,
4,1000086,52,train,27.3455,18.0,20.0,8.0,22.0,0.0,0.0,...,0.0,-10.47450,2.44623,1.399650,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37851,2035762,73,heldout_test,25.3688,12.0,7.0,5.0,24.0,0.0,0.0,...,0.0,-13.48930,2.64829,-2.457690,7.0,77.852402,7.0,74.190857,4.852402,1.190857
37852,2264650,61,heldout_test,30.5389,,7.0,,41.0,0.0,0.0,...,,-13.46180,4.96731,-3.327250,29.0,61.950256,29.0,63.557865,0.950256,2.557865
37853,2329354,68,heldout_test,22.5614,18.0,20.0,8.0,30.0,0.0,0.0,...,1.0,,,,36.0,66.802460,36.0,65.315231,-1.197540,-2.684769
37854,2457356,60,heldout_test,29.6270,18.0,20.0,8.0,22.0,1.0,1.0,...,0.0,,,,9.0,61.610027,9.0,63.635746,1.610027,3.635746


In [8]:
# save overview
data_overview.to_csv(preds_save_path+'predictions_overview_100-500p.csv', index=False)