In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
import datetime
import torch
import math

from modules.util.util import importstr
from modules.util.logconf import logging
log = logging.getLogger('nb')

In [4]:
def run(app, *argv):    
    torch.cuda.empty_cache()
    
    argv = list(argv)
    log.info("Running: {}({!r}).main()".format(app, argv))
    
    app_cls = importstr(*app.rsplit('.', 1))
    app_cls(argv).main()
    
    log.info("Finished: {}.{!r}.main()".format(app, argv))

In [4]:
# width_irc=(16,128,128)
# run('prepcache.CovidPrepCacheApp', f'--num-workers={8}', f'--batch-size={20}', '--width-irc', f'{width_irc[0]}', f'{width_irc[1]}', f'{width_irc[2]}')

2020-12-07 04:56:41,940 INFO     pid:22723 nb:005:run Running: prepcache.CovidPrepCacheApp(['--num-workers=8', '--batch-size=20', '--width-irc', '16', '128', '128']).main()
2020-12-07 04:56:44,332 INFO     pid:22723 prepcache:039:main Starting CovidPrepCacheApp, Namespace(batch_size=20, num_workers=8, width_irc=['16', '128', '128'])
100%|██████████| 10/10 [02:27<00:00, 14.72s/it]
2020-12-07 04:59:11,832 INFO     pid:22723 nb:010:run Finished: prepcache.CovidPrepCacheApp.['--num-workers=8', '--batch-size=20', '--width-irc', '16', '128', '128'].main()


## Basic Unet 3D

In [4]:
batch_size = 2
epochs = 50
steps_per_epoch=200
val_cadence = 5
recall_priority = 0
unet_depth=3
pad_types = ['zero', 'replicate']
windows = ['lung', 'mediastinal', 'shifted_lung']
project_name = 'covid19_seg'
width_irc = (12,192,192)

In [5]:
model_path = 'saved-models/2020-11-22_11.01.15.best.state'

In [None]:
run_name = 'unet3d train run p2'

In [1]:
run('training.CovidSegmentationTrainingApp', f'--epochs={epochs}', f'--batch-size={batch_size}', 
    f'--steps-per-epoch={steps_per_epoch}', f'--val-cadence={val_cadence}', f'--recall-priority={recall_priority}',
    f'--depth={unet_depth}', f'--pad-type={pad_types[1]}', '--augmented', f'--model-path={model_path}',
    '--width-irc', f'{width_irc[0]}', f'{width_irc[1]}', f'{width_irc[2]}', 
    f'--project-name={project_name}', f'--run-name={run_name}')

# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

## Covid Seg Net

In [5]:
batch_size = 2
epochs = 100
steps_per_epoch=320
val_cadence = 5
depth = 3
windows = ['lung', 'mediastinal', 'shifted_lung']
project_name = 'covid19_seg'
width_irc=(16,128,128)

In [6]:
run_name = 'Train p3 (320spe,2gpu,bs2x3,aug)'
notes = 'Train Run Continuation'

In [7]:
model_path = 'saved-models/2020-12-08_06.03.53.best.state'

In [8]:
run('training_alpha.CovidSegmentationTrainingApp', f'--epochs={epochs}', f'--batch-size={batch_size}', f'--num-workers={8}',
    f'--ct-window={windows[2]}', f'--val-cadence={val_cadence}', f'--depth={depth}', '--augmented',
    '--width-irc', f'{width_irc[0]}', f'{width_irc[1]}', f'{width_irc[2]}', f'--notes={notes}',
    f'--model-path={model_path}',
    f'--steps-per-epoch={steps_per_epoch}', f'--project-name={project_name}', f'--run-name={run_name}')


# dummy run to finish wandb
run('training.CovidSegmentationTrainingApp', '--epochs=0', '--run-name=to delete')

2020-12-08 19:15:02,333 INFO     pid:8597 nb:005:run Running: training_alpha.CovidSegmentationTrainingApp(['--epochs=100', '--batch-size=2', '--num-workers=8', '--ct-window=shifted_lung', '--val-cadence=5', '--depth=3', '--augmented', '--width-irc', '16', '128', '128', '--notes=Train Run Continuation', '--model-path=saved-models/2020-12-08_06.03.53.best.state', '--steps-per-epoch=320', '--project-name=covid19_seg', '--run-name=Train p3 (320spe,2gpu,bs2x3,aug)']).main()
2020-12-08 19:15:04,901 INFO     pid:8597 wandb:606:_apply_source_login setting login settings: {}
[34m[1mwandb[0m: Currently logged in as: [33mpeterbacalso[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.12 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


2020-12-08 19:15:09,766 INFO     pid:8597 training_alpha:232:init_model Using CUDA; 2 devices.
2020-12-08 19:15:09,851 INFO     pid:8597 modules.dsets:374:__init__ TrainingV2Covid2dSegmentationDataset: training mode, 179 uid's, 320 steps_per_epoch
2020-12-08 19:15:09,854 INFO     pid:8597 modules.dsets:447:__init__ TrainingV2Covid2dSegmentationDataset: (16, 128, 128) width_irc
2020-12-08 19:15:09,857 INFO     pid:8597 modules.dsets:374:__init__ Covid2dSegmentationDataset: validation mode, 20 uid's, 20 steps_per_epoch
2020-12-08 19:15:09,963 INFO     pid:8597 training_alpha:549:main Starting CovidSegmentationTrainingApp, Namespace(augment_flip=False, augment_noise=False, augment_offset=False, augment_rotate=False, augment_scale=False, augmented=True, batch_size=2, ct_window='shifted_lung', depth=3, epochs=100, freeze=False, model_path='saved-models/2020-12-08_06.03.53.best.state', notes='Train Run Continuation', num_workers=8, project_name='covid19_seg', run_name='Train p3 (320spe,2gpu,

epoch,loss/trn,dice_loss/trn,ce_loss/trn,loss/val,dice_loss/val,ce_loss/val,metrics_val/miss_rate,metrics_val/fp_to_mask_ratio,pr_val/precision,pr_val/recall,pr_val/f1_score,surface_dist_val/mean,surface_dist_val/root_mean_squared,surface_dist_val/hausdorff
1,0.2882,0.2711,0.3281,0.3049,0.299,0.3185,0.2935,0.1491,0.8258,0.7065,0.7615,6.4287,15.5576,100.9498
5,0.3031,0.2918,0.3296,0.3017,0.2945,0.3187,0.3123,0.1449,0.8259,0.6877,0.7505,6.1811,15.1413,96.6757
10,0.287,0.2693,0.3282,0.2937,0.283,0.3188,0.2682,0.1643,0.8167,0.7318,0.7719,6.4757,15.6356,100.6566
15,0.272,0.2483,0.3274,0.2984,0.2896,0.3189,0.2965,0.1388,0.8352,0.7035,0.7637,5.8426,14.4757,97.1991
20,0.2847,0.2663,0.3276,0.2951,0.285,0.3185,0.3,0.1435,0.8298,0.7,0.7594,5.5319,14.4782,101.5472
25,0.2816,0.2618,0.3277,0.2955,0.2854,0.3188,0.2964,0.1418,0.8322,0.7036,0.7625,5.5637,14.5396,100.7926
30,0.2857,0.2681,0.327,0.294,0.2835,0.3187,0.2779,0.1701,0.8093,0.7221,0.7632,6.2266,15.9787,102.4967


2020-12-08 19:16:34,126 INFO     pid:8597 training_alpha:406:log_metrics E1 CovidSegmentationTrainingApp
2020-12-08 19:16:34,131 INFO     pid:8597 training_alpha:442:log_metrics E1 trn      0.2882 loss, 0.2711 dice loss, 0.3281 ce loss, 0.8892 precision, 0.8228 recall, 0.8547 f1 score 0.1772 miss rate 0.1025 fp to label ratio
2020-12-08 19:21:00,360 INFO     pid:8597 training_alpha:406:log_metrics E1 CovidSegmentationTrainingApp
2020-12-08 19:21:00,372 INFO     pid:8597 training_alpha:453:log_metrics E1 val      0.3049 loss, 0.2990 dice loss, 0.3185 ce_loss, 0.8258 precision, 0.7065 recall, 0.7615 f1 score 0.2935 miss rate 0.1491 fp to label ratio6.4287 mean distance15.5576 root mean squared distance100.9498 hausdorff distance
2020-12-08 19:21:03,040 INFO     pid:8597 training_alpha:534:save_model Saved model params to saved-models/2020-12-08_19.15.04.320.state
2020-12-08 19:21:03,058 INFO     pid:8597 training_alpha:540:save_model Saved model params to saved-models/2020-12-08_19.15.04

ValueError: Tried to step 24002 times. The specified number of total steps is 24000