In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
import torch
import math

from modules.util.util import importstr
from modules.util.logconf import logging
log = logging.getLogger('nb')

In [3]:
def run(app, *argv):    
    torch.cuda.empty_cache()
    
    argv = list(argv)
    argv.insert(0, '--num-workers=4')
    log.info("Running: {}({!r}).main()".format(app, argv))
    
    app_cls = importstr(*app.rsplit('.', 1))
    app_cls(argv).main()
    
    log.info("Finished: {}.{!r}.main()".format(app, argv))

In [4]:
batch_size = 2
epochs = 500
steps_per_epoch=200
val_cadence = 1
recall_priority = 0
unet_depth=3
pad_types = ['zero', 'replicate']
windows = [None, 'lung', 'mediastinal', 'dist']
project_name = 'covid19_seg'
width_irc = (12,192,192)

In [5]:
run_name = 'train run'

In [6]:
# run('prepcache.CovidPrepCacheApp')

2020-11-21 15:46:40,317 INFO     pid:19716 nb:006:run Running: prepcache.CovidPrepCacheApp(['--num-workers=4']).main()
2020-11-21 15:46:41,483 INFO     pid:19716 prepcache:039:main Starting CovidPrepCacheApp, Namespace(batch_size=200, num_workers=4, width_irc=[12, 192, 192])
100%|██████████| 1/1 [01:35<00:00, 95.00s/it]
2020-11-21 15:48:16,547 INFO     pid:19716 nb:011:run Finished: prepcache.CovidPrepCacheApp.['--num-workers=4'].main()


In [6]:
run('training.CovidSegmentationTrainingApp', f'--epochs={epochs}', f'--batch-size={batch_size}', 
    f'--steps-per-epoch={steps_per_epoch}', f'--val-cadence={val_cadence}', f'--recall-priority={recall_priority}',
    f'--depth={unet_depth}', f'--pad-type={pad_types[1]}', '--augmented',
    '--width-irc', f'{width_irc[0]}', f'{width_irc[1]}', f'{width_irc[2]}', 
    f'--project-name={project_name}', f'--run-name={run_name}')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

2020-11-21 20:49:17,657 INFO     pid:39616 nb:006:run Running: training.CovidSegmentationTrainingApp(['--num-workers=4', '--epochs=500', '--batch-size=2', '--steps-per-epoch=200', '--val-cadence=1', '--recall-priority=0', '--depth=3', '--pad-type=replicate', '--augmented', '--width-irc', '12', '192', '192', '--project-name=covid19_seg', '--run-name=train run']).main()
2020-11-21 20:49:19,801 INFO     pid:39616 wandb:606:_apply_source_login setting login settings: {}
[34m[1mwandb[0m: Currently logged in as: [33mpeterbacalso[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


2020-11-21 20:49:20,974 INFO     pid:39616 training:210:init_model Using CUDA; 1 devices.
2020-11-21 20:49:23,414 INFO     pid:39616 modules.dsets:315:__init__ TrainingCovid2dSegmentationDataset: training mode, 179 uid's
2020-11-21 20:49:23,415 INFO     pid:39616 modules.dsets:340:__init__ TrainingCovid2dSegmentationDataset: (12, 192, 192) width_irc,  200 steps_per_epoch
2020-11-21 20:49:23,416 INFO     pid:39616 modules.dsets:315:__init__ Covid2dSegmentationDataset: validation mode, 20 uid's
2020-11-21 20:49:23,418 INFO     pid:39616 training:484:main Starting CovidSegmentationTrainingApp, Namespace(augment_flip=False, augment_noise=False, augment_offset=False, augment_rotate=False, augment_scale=False, augmented=True, batch_size=2, ct_window=None, depth=3, epochs=500, num_workers=4, pad_type='replicate', project_name='covid19_seg', recall_priority=0, run_name='train run', steps_per_epoch=200, val_cadence=1, width_irc=['12', '192', '192'])


epoch,loss/trn,loss/val,metrics_val/miss_rate,metrics_val/fp_to_mask_ratio,pr_val/precision,pr_val/recall,pr_val/f1_score
1,0.8008,0.8377,0.4259,1.2366,0.3171,0.5741,0.4085
2,0.6809,0.6344,0.4054,0.9214,0.3922,0.5946,0.4727
3,0.6654,0.6532,0.4921,0.625,0.4483,0.5079,0.4762
4,0.6526,0.6879,0.5298,0.7457,0.3867,0.4702,0.4244
5,0.6641,0.6002,0.3076,1.3068,0.3463,0.6924,0.4617
6,0.6253,0.8053,0.7829,0.1183,0.6474,0.2171,0.3252
7,0.6227,0.6013,0.4365,0.599,0.4847,0.5635,0.5211
8,0.6194,0.6353,0.4117,0.8813,0.4003,0.5883,0.4764
9,0.6046,0.6571,0.3094,1.7273,0.2856,0.6906,0.4041
10,0.603,0.6166,0.1431,2.216,0.2789,0.8569,0.4208


2020-11-21 20:54:17,151 INFO     pid:39616 training:357:log_metrics E1 CovidSegmentationTrainingApp
2020-11-21 20:54:17,153 INFO     pid:39616 training:386:log_metrics E1 trn      0.8008 loss, 0.1242 precision, 0.6778 recall, 0.2099 f1 score 0.3222 miss rate 4.7794 fp to label ratio
2020-11-21 20:59:31,957 INFO     pid:39616 training:357:log_metrics E1 CovidSegmentationTrainingApp
2020-11-21 20:59:31,959 INFO     pid:39616 training:395:log_metrics E1 val      0.8377 loss, 0.3171 precision, 0.5741 recall, 0.4085 f1 score 0.4259 miss rate 1.2366 fp to label ratio
2020-11-21 20:59:34,452 INFO     pid:39616 training:470:save_model Saved model params to saved-models/2020-11-21_20.49.19.600.state
2020-11-21 20:59:34,455 INFO     pid:39616 training:475:save_model Saved model params to saved-models/2020-11-21_20.49.19.best.state
2020-11-21 20:59:34,461 INFO     pid:39616 training:480:save_model SHA1: 4a0b43be43622b21103dc7bf3dfea3bfb681d3c6
2020-11-21 21:05:21,446 INFO     pid:39616 training:3

AssertionError: Caught AssertionError in DataLoader worker process 2.
Original Traceback (most recent call last):
  File "/home/peter/programming/venvs/ml/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 185, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/peter/programming/venvs/ml/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/peter/programming/venvs/ml/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/peter/programming/projects/covid_CT_lung_lesion_segmentation/modules/dsets.py", line 351, in __getitem__
    return self.getitem_cropbox(uid)
  File "/home/peter/programming/projects/covid_CT_lung_lesion_segmentation/modules/dsets.py", line 364, in getitem_cropbox
    center_irc = get_random_center(aug_mask_t, 1)
  File "/home/peter/programming/projects/covid_CT_lung_lesion_segmentation/modules/dsets.py", line 246, in get_random_center
    assert label_value in mask_t, repr(f'{label_value} not in mask')
AssertionError: '1 not in mask'
