In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
import torch
import math

from modules.util.util import importstr
from modules.util.logconf import logging
log = logging.getLogger('nb')

In [3]:
def run(app, *argv):    
    torch.cuda.empty_cache()
    
    argv = list(argv)
    argv.insert(0, '--num-workers=4')
    log.info("Running: {}({!r}).main()".format(app, argv))
    
    app_cls = importstr(*app.rsplit('.', 1))
    app_cls(argv).main()
    
    log.info("Finished: {}.{!r}.main()".format(app, argv))

In [4]:
# we use this to test if the bounding box dimension is valid for our unet
def test_num(num, depth):
    offset = int(math.ceil(num*.33))
    curr_width = width = num - offset
    for i in range(depth-1):
        curr_width = math.floor(curr_width / 2)
    for i in range(depth-1):
        curr_width = curr_width * 2
    return width == curr_width, repr([width, curr_width])

In [5]:
experiment_epochs = 3
unet_depth=3
val_cadence = 1
batch_size = 32
recall_priority = 0
pad_types = ['zero', 'reflect', 'replicate']
windows = [None, 'lung', 'mediastinal', 'dist']

bbox_dim = 246

In [6]:
for num in [60,108,144,198,246,288,323]:
    works, dim = test_num(num, unet_depth)
    assert works, repr(dim)

Cache all the different sizes of bounding box

In [7]:
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '60', '60')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '108', '108')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '144', '144')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '198', '198')
run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '246', '246')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '288', '288')
# run('prepcache.CovidPrepCacheApp', '--width-irc', '7', '323', '323')

2020-11-14 19:02:16,930 INFO     pid:54362 nb:006:run Running: prepcache.CovidPrepCacheApp(['--num-workers=4', '--width-irc', '7', '246', '246']).main()
2020-11-14 19:02:17,872 INFO     pid:54362 prepcache:039:main Starting CovidPrepCacheApp, Namespace(batch_size=1024, num_workers=4, width_irc=['7', '246', '246'])
100%|██████████| 2/2 [01:20<00:00, 40.47s/it]
2020-11-14 19:03:38,881 INFO     pid:54362 nb:011:run Finished: prepcache.CovidPrepCacheApp.['--num-workers=4', '--width-irc', '7', '246', '246'].main()


# Bounding Box Test

In [None]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--run-name=bbox60')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '108', '108', '--run-name=bbox108')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '144', '144', '--run-name=bbox144')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '198', '198', '--run-name=bbox198')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '246', '246', '--run-name=bbox246')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '288', '288', '--run-name=bbox288')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--width-irc', '7', '323', '323', '--run-name=bbox323')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

# Augmentation Test

In [9]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-flip', '--run-name=flip aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-offset', '--run-name=offset aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-scale', '--run-name=scale aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-rotate', '--run-name=rotate aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-noise', '--run-name=noise aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augmented', '--run-name=all aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

In [10]:
# increased range of scale from .2 to .3 and range of offset from .1 to .3
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-offset', '--run-name=larger offset aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', '--augment-scale', '--run-name=larger scale aug', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

# Window Test

In [9]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[1]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', '--run-name=lung window')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[2]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', '--run-name=mediastinal window')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

# Padding Test

In [1]:
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[1]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', f'--pad-type={pad_types[1]}', '--run-name=reflect padding')
run('training.CovidSegmentationTrainingApp', f'--epochs={experiment_epochs}', f'--batch-size={batch_size}', f'--ct-window={windows[1]}', 
    '--augmented', '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', f'--pad-type={pad_types[2]}', '--run-name=replicate padding')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

# Train Run

In [5]:
epochs = 50
unet_depth=3
val_cadence = 5
batch_size = 32
recall_priority = 0
pad_type = 'replicate'
window = 'lung'
steps_per_epoch = 100000
bbox_dim = 246

In [6]:
run('training.CovidSegmentationTrainingApp', f'--epochs={epochs}', f'--batch-size={batch_size}', 
    f'--ct-window={window}', '--augmented', f'--val-cadence={val_cadence}', f'--depth={unet_depth}', 
    '--width-irc', '7', f'{bbox_dim}', f'{bbox_dim}', f'--pad-type={pad_type}', 
    f'--steps-per-epoch={steps_per_epoch}', '--run-name=train run 1')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

2020-11-15 00:04:44,749 INFO     pid:91418 nb:006:run Running: training.CovidSegmentationTrainingApp(['--num-workers=4', '--epochs=50', '--batch-size=32', '--ct-window=lung', '--augmented', '--val-cadence=5', '--depth=3', '--width-irc', '7', '246', '246', '--pad-type=replicate', '--steps-per-epoch=100000', '--run-name=train run 1']).main()
2020-11-15 00:04:46,195 INFO     pid:91418 training:195:init_model Using CUDA; 1 devices.
2020-11-15 00:04:48,357 INFO     pid:91418 modules.dsets:188:__init__ TrainingCovid2dSegmentationDataset: training mode, 177 uid's, 4379 index slices, 1080 lesions
2020-11-15 00:04:48,358 INFO     pid:91418 modules.dsets:232:__init__ TrainingCovid2dSegmentationDataset: (7, 246, 246) width_irc
2020-11-15 00:04:48,369 INFO     pid:91418 modules.dsets:188:__init__ Covid2dSegmentationDataset: validation mode, 20 uid's, 592 index slices, 157 lesions
2020-11-15 00:04:48,942 INFO     pid:91418 wandb:606:_apply_source_login setting login settings: {}
[34m[1mwandb[0m:

2020-11-15 00:04:50,311 INFO     pid:91418 training:478:main Starting CovidSegmentationTrainingApp, Namespace(augment_flip=False, augment_noise=False, augment_offset=False, augment_rotate=False, augment_scale=False, augmented=True, batch_size=32, ct_window='lung', depth=3, epochs=50, num_workers=4, pad_type='replicate', recall_priority=0, run_name='train run 1', steps_per_epoch=100000, val_cadence=5, width_irc=['7', '246', '246'])


epoch,loss/trn,loss/val,metrics_val/miss_rate,metrics_val/fp_to_mask_ratio,pr_val/precision,pr_val/recall,pr_val/f1_score
1,0.4367,0.6216,0.3483,0.6826,0.4884,0.6517,0.5584
5,0.3304,0.4428,0.2999,0.439,0.6146,0.7001,0.6546
10,0.3281,0.4245,0.3302,0.2982,0.692,0.6698,0.6807
15,0.3301,0.4549,0.1777,0.7762,0.5144,0.8223,0.6329
20,0.3292,0.4373,0.2865,0.4245,0.627,0.7135,0.6675
25,0.3191,0.4127,0.301,0.3131,0.6906,0.699,0.6948
30,0.3156,0.4398,0.2363,0.5583,0.5777,0.7637,0.6578
35,0.3024,0.422,0.2376,0.4747,0.6163,0.7624,0.6816
40,0.2861,0.41,0.3265,0.2271,0.7478,0.6735,0.7087
45,0.2508,0.4007,0.2662,0.3002,0.7097,0.7338,0.7215


2020-11-15 00:13:34,185 INFO     pid:91418 training:316:log_metrics E1 CovidSegmentationTrainingApp
2020-11-15 00:13:34,188 INFO     pid:91418 training:345:log_metrics E1 trn      0.4367 loss, 0.5352 precision, 0.6664 recall, 0.5936 f1 score 0.3336 miss rate 0.5787 fp to label ratio
2020-11-15 00:14:10,962 INFO     pid:91418 training:316:log_metrics E1 CovidSegmentationTrainingApp
2020-11-15 00:14:10,963 INFO     pid:91418 training:354:log_metrics E1 val      0.6216 loss, 0.4884 precision, 0.6517 recall, 0.5584 f1 score 0.3483 miss rate 0.6826 fp to label ratio
2020-11-15 00:19:58,149 INFO     pid:91418 training:457:save_model Saved model params to saved-models/2020-11-15_00.04.46.100000.state
2020-11-15 00:19:58,151 INFO     pid:91418 training:462:save_model Saved model params to saved-models/2020-11-15_00.04.46.best.state
2020-11-15 00:19:58,153 INFO     pid:91418 training:467:save_model SHA1: ca4564c819a3d95fb54d8908887e4ecfac2917d8
2020-11-15 00:28:55,127 INFO     pid:91418 trainin

VBox(children=(Label(value=' 69.39MB of 69.39MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.999982134…

0,1
_step,5000000.0
_runtime,34993.0
_timestamp,1605451682.0
loss/trn,0.21486
metrics_trn/miss_rate,0.23059
metrics_trn/fp_to_mask_ratio,0.17888
pr_trn/precision,0.81137
pr_trn/recall,0.76941
pr_trn/f1_score,0.78983
loss/val,0.39151


0,1
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_runtime,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
loss/trn,█▆▅▅▅▅▅▅▅▅▅▅▅▅▄▅▄▄▄▅▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▂▂▂▁▁
metrics_trn/miss_rate,█▇▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▂▂▁▁
metrics_trn/fp_to_mask_ratio,█▄▄▄▃▃▃▃▃▃▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
pr_trn/precision,▁▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇███
pr_trn/recall,▁▂▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇██
pr_trn/f1_score,▁▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇███
loss/val,█▃▂▃▂▂▂▂▂▁▁


[34m[1mwandb[0m: wandb version 0.10.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


RuntimeError: No active exception to reraise