In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import datetime
import torch
import math

from modules.util.util import importstr
from modules.util.logconf import logging
log = logging.getLogger('nb')

In [4]:
def run(app, *argv):    
    torch.cuda.empty_cache()
    
    argv = list(argv)
    argv.insert(0, '--num-workers=4')
    log.info("Running: {}({!r}).main()".format(app, argv))
    
    app_cls = importstr(*app.rsplit('.', 1))
    app_cls(argv).main()
    
    log.info("Finished: {}.{!r}.main()".format(app, argv))

Hyperparams to test:

1. bounding box size
2. ~batch size~
3. ~learning rate~
4. steps per epoch
5. windowing
6. ~augmentation~
7. model depth
8. weight decay
9. pad type

In [5]:
# we use this to test if the bounding box dimension is valid for our unet
def test_num(num, depth):
    offset = int(math.ceil(num*.33))
    curr_width = width = num - offset
    for i in range(depth-1):
        curr_width = math.floor(curr_width / 2)
    for i in range(depth-1):
        curr_width = curr_width * 2
    return width == curr_width, repr([width, curr_width])

In [6]:
experiment_epochs = 3
unet_depth=3
val_cadence = 1
batch_size = 32
recall_priority = 0
pad_types = ['zero', 'reflect', 'replicate']
windows = [None, 'lung', 'mediastinal', 'dist']

bbox_dim = 246

In [7]:
for num in [60,108,144,198,246,288,323]:
    works, dim = test_num(num, unet_depth)
    assert works, repr(dim)

Cache all the different sizes of bounding box

In [7]:
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '60', '60')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '108', '108')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '144', '144')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '198', '198')
run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '246', '246')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '288', '288')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '323', '323')

2020-11-14 19:02:16,930 INFO     pid:54362 nb:006:run Running: prepcache.CovidPrepCacheApp(['--num-workers=4', '--width-irc', '7', '246', '246']).main()
2020-11-14 19:02:17,872 INFO     pid:54362 prepcache:039:main Starting CovidPrepCacheApp, Namespace(batch_size=1024, num_workers=4, width_irc=['7', '246', '246'])
100%|██████████| 2/2 [01:20<00:00, 40.47s/it]
2020-11-14 19:03:38,881 INFO     pid:54362 nb:011:run Finished: prepcache.CovidPrepCacheApp.['--num-workers=4', '--width-irc', '7', '246', '246'].main()


# Bounding Box Test

In [None]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--run-name=bbox60')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '108', '108', '--run-name=bbox108')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '144', '144', '--run-name=bbox144')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '198', '198', '--run-name=bbox198')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '246', '246', '--run-name=bbox246')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '288', '288', '--run-name=bbox288')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '323', '323', '--run-name=bbox323')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

# Augmentation Test

In [9]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-flip', '--run-name=flip aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-offset', '--run-name=offset aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-scale', '--run-name=scale aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-rotate', '--run-name=rotate aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-noise', '--run-name=noise aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augmented', '--run-name=all aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

In [10]:
# increased range of scale from .2 to .3 and range of offset from .1 to .3
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-offset', '--run-name=larger offset aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-scale', '--run-name=larger scale aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

# Window Test

In [9]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[1]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', '--run-name=lung window')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[2]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', '--run-name=mediastinal window')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

# Padding Test

In [10]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[1]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', f'--pad-type={pad_types[1]}', '--run-name=reflect padding')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[1]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', f'--pad-type={pad_types[2]}', '--run-name=replicate padding')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

2020-11-14 21:24:58,367 INFO     pid:74032 nb:006:run Running: training.CovidSegmentationTrainingApp(['--num-workers=4', '--epochs=3', '--batch-size=32', '--ct-window=lung', '--augmented', '--width-irc', '7', '246', '246', '--pad-type=reflect', '--run-name=reflect padding']).main()
2020-11-14 21:24:58,444 INFO     pid:74032 training:195:init_model Using CUDA; 1 devices.
2020-11-14 21:25:01,114 INFO     pid:74032 modules.dsets:185:__init__ TrainingCovid2dSegmentationDataset: training mode, 177 uid's, 4379 index slices, 1080 lesions
2020-11-14 21:25:01,115 INFO     pid:74032 modules.dsets:226:__init__ TrainingCovid2dSegmentationDataset: (7, 246, 246) width_irc
2020-11-14 21:25:01,123 INFO     pid:74032 modules.dsets:185:__init__ Covid2dSegmentationDataset: validation mode, 20 uid's, 592 index slices, 157 lesions


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

[34m[1mwandb[0m: wandb version 0.10.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


2020-11-14 21:25:05,700 INFO     pid:74032 training:474:main Starting CovidSegmentationTrainingApp, Namespace(augment_flip=False, augment_noise=False, augment_offset=False, augment_rotate=False, augment_scale=False, augmented=True, batch_size=32, ct_window='lung', depth=3, epochs=3, num_workers=4, pad_type='reflect', recall_priority=0, run_name='reflect padding', steps_per_epoch=10000.0, val_cadence=1, width_irc=['7', '246', '246'])


epoch,loss/trn,loss/val,metrics_val/miss_rate,metrics_val/fp_to_mask_ratio,pr_val/precision,pr_val/recall,pr_val/f1_score
1,0.6128,0.7785,0.4941,1.6221,0.2377,0.5059,0.3235
2,0.4738,0.5067,0.3461,0.6086,0.518,0.6539,0.5781
3,0.4315,0.4993,0.3081,0.657,0.5129,0.6919,0.5891


2020-11-14 21:25:56,390 INFO     pid:74032 training:316:log_metrics E1 CovidSegmentationTrainingApp
2020-11-14 21:25:56,392 INFO     pid:74032 training:345:log_metrics E1 trn      0.6128 loss, 0.2472 precision, 0.6993 recall, 0.3653 f1 score 0.3007 miss rate 2.1298 fp to label ratio
2020-11-14 21:26:11,319 INFO     pid:74032 training:316:log_metrics E1 CovidSegmentationTrainingApp
2020-11-14 21:26:11,320 INFO     pid:74032 training:354:log_metrics E1 val      0.7785 loss, 0.2377 precision, 0.5059 recall, 0.3235 f1 score 0.4941 miss rate 1.6221 fp to label ratio
2020-11-14 21:26:29,314 INFO     pid:74032 training:453:save_model Saved model params to saved-models/2020-11-14_21.24.58.10000.state
2020-11-14 21:26:29,316 INFO     pid:74032 training:458:save_model Saved model params to saved-models/2020-11-14_21.24.58.best.state
2020-11-14 21:26:29,319 INFO     pid:74032 training:463:save_model SHA1: fc6c17bf9c0162e2810e0167c4448bb951d6888c
2020-11-14 21:27:20,463 INFO     pid:74032 training

VBox(children=(Label(value=' 19.06MB of 19.09MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.998224425…

0,1
loss/trn,0.43153
metrics_trn/miss_rate,0.34549
metrics_trn/fp_to_mask_ratio,0.47347
pr_trn/precision,0.58025
pr_trn/recall,0.65451
pr_trn/f1_score,0.61515
loss/val,0.49929
metrics_val/miss_rate,0.3081
metrics_val/fp_to_mask_ratio,0.65704
pr_val/precision,0.51292


0,1
loss/trn,█▃▁
metrics_trn/miss_rate,▁██
metrics_trn/fp_to_mask_ratio,█▂▁
pr_trn/precision,▁▆█
pr_trn/recall,█▁▁
pr_trn/f1_score,▁▇█
loss/val,█▁▁
metrics_val/miss_rate,█▂▁
metrics_val/fp_to_mask_ratio,█▁▁
pr_val/precision,▁██


[34m[1mwandb[0m: wandb version 0.10.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


2020-11-14 21:31:23,739 INFO     pid:74032 training:474:main Starting CovidSegmentationTrainingApp, Namespace(augment_flip=False, augment_noise=False, augment_offset=False, augment_rotate=False, augment_scale=False, augmented=True, batch_size=32, ct_window='lung', depth=3, epochs=3, num_workers=4, pad_type='replicate', recall_priority=0, run_name='replicate padding', steps_per_epoch=10000.0, val_cadence=1, width_irc=['7', '246', '246'])


epoch,loss/trn,loss/val,metrics_val/miss_rate,metrics_val/fp_to_mask_ratio,pr_val/precision,pr_val/recall,pr_val/f1_score
1,0.5948,0.5779,0.2067,1.4745,0.3498,0.7933,0.4855
2,0.4846,0.5885,0.2811,1.1694,0.3807,0.7189,0.4978
3,0.4334,0.5012,0.2864,0.7416,0.4904,0.7136,0.5813


2020-11-14 21:32:16,566 INFO     pid:74032 training:316:log_metrics E1 CovidSegmentationTrainingApp
2020-11-14 21:32:16,567 INFO     pid:74032 training:345:log_metrics E1 trn      0.5948 loss, 0.2932 precision, 0.6766 recall, 0.4091 f1 score 0.3234 miss rate 1.6312 fp to label ratio
2020-11-14 21:32:34,810 INFO     pid:74032 training:316:log_metrics E1 CovidSegmentationTrainingApp
2020-11-14 21:32:34,813 INFO     pid:74032 training:354:log_metrics E1 val      0.5779 loss, 0.3498 precision, 0.7933 recall, 0.4855 f1 score 0.2067 miss rate 1.4745 fp to label ratio
2020-11-14 21:32:51,715 INFO     pid:74032 training:453:save_model Saved model params to saved-models/2020-11-14_21.29.21.10000.state
2020-11-14 21:32:51,717 INFO     pid:74032 training:458:save_model Saved model params to saved-models/2020-11-14_21.29.21.best.state
2020-11-14 21:32:51,720 INFO     pid:74032 training:463:save_model SHA1: a8930fa94086fe101287caad80e8d1ac812806c5
2020-11-14 21:33:43,410 INFO     pid:74032 training

VBox(children=(Label(value=' 18.94MB of 18.94MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.999934534…

0,1
loss/trn,0.43344
metrics_trn/miss_rate,0.3446
metrics_trn/fp_to_mask_ratio,0.48638
pr_trn/precision,0.57402
pr_trn/recall,0.6554
pr_trn/f1_score,0.61202
loss/val,0.50115
metrics_val/miss_rate,0.28645
metrics_val/fp_to_mask_ratio,0.74159
pr_val/precision,0.49037


0,1
loss/trn,█▃▁
metrics_trn/miss_rate,▁█▇
metrics_trn/fp_to_mask_ratio,█▂▁
pr_trn/precision,▁▆█
pr_trn/recall,█▁▂
pr_trn/f1_score,▁▆█
loss/val,▇█▁
metrics_val/miss_rate,▁██
metrics_val/fp_to_mask_ratio,█▅▁
pr_val/precision,▁▃█


[34m[1mwandb[0m: wandb version 0.10.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


RuntimeError: No active exception to reraise

# Train Run