Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import datetime
import torch
import math
from pathlib import Path

from modules.util.util import importstr
from modules.util.logconf import logging
log = logging.getLogger('nb')

from dotenv import load_dotenv
load_dotenv()
data_dir = os.environ.get('datasets_path')  
local_dataset_path = Path(f'{data_dir}/COVID-19-20_v2') 

In [3]:
def run(app, *argv):    
    torch.cuda.empty_cache()
    
    argv = list(argv)
    argv.insert(0, '--num-workers=4')
    log.info("Running: {}({!r}).main()".format(app, argv))
    
    app_cls = importstr(*app.rsplit('.', 1))
    app_cls(argv).main()
    
    log.info("Finished: {}.{!r}.main()".format(app, argv))

# Experiment

In [4]:
batch_size = 2
experiment_epochs = 2
val_cadence = 1
ratio_int = 1
recall_priority = 0
unet_depth = 3
project_name = 'covid19_seg_v2'
pad_types = ['zero', 'replicate']
width_cri = (192,192,16)
data_path = str(local_dataset_path/'Train')

In [5]:
run_name = 'aug gpu'

In [None]:
run('monai_training.CovidSegmentationTrainingApp', f'--batch-size={batch_size}', f'--epochs={experiment_epochs}', 
    f'--val-cadence={val_cadence}', f'--data-path={data_path}', '--augmented',
    f'--recall-priority={recall_priority}', f'--depth={unet_depth}', f'--pad-type={pad_types[0]}',
    '--width-cri', f'{width_cri[0]}', f'{width_cri[1]}', f'{width_cri[2]}',
    f'--project-name={project_name}', f'--run-name={run_name}')

# dummy run to finish wandb
run('monai_training.CovidSegmentationTrainingApp', '--epochs=0', f'--project-name={project_name}', '--run-name=to delete')

2020-11-26 23:51:32,766 INFO     pid:71497 nb:006:run Running: monai_training.CovidSegmentationTrainingApp(['--num-workers=4', '--batch-size=2', '--epochs=2', '--val-cadence=1', '--data-path=/mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train', '--augmented', '--recall-priority=0', '--depth=3', '--pad-type=zero', '--width-cri', '192', '192', '16', '--project-name=covid19_seg_v2', '--run-name=aug gpu']).main()
2020-11-26 23:51:34,018 INFO     pid:71497 wandb:606:_apply_source_login setting login settings: {}
[34m[1mwandb[0m: Currently logged in as: [33mpeterbacalso[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


2020-11-26 23:51:35,140 INFO     pid:71497 monai_training:199:init_model Using CUDA; 1 devices.
2020-11-26 23:51:37,021 INFO     pid:71497 modules.monai_dset:076:get_ds training: image/label (199) folder: /mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train
2020-11-26 23:51:37,022 INFO     pid:71497 modules.monai_dset:095:get_ds training: train 8 val 3, folder: /mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train
Load and cache transformed data: 100%|██████████| 8/8 [00:32<00:00,  4.12s/it]
Load and cache transformed data: 100%|██████████| 3/3 [00:10<00:00,  3.51s/it]
2020-11-26 23:52:20,555 INFO     pid:71497 monai_training:470:main Starting CovidSegmentationTrainingApp, Namespace(augment_flip=False, augment_noise=False, augment_offset=False, augment_rotate=False, augment_scale=False, augmented=True, batch_size=2, data_path='/mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train', depth=3, epochs=2, num_workers=4, pad_type='zero',

epoch,loss/trn,loss/val,metrics_val/miss_rate,metrics_val/fp_to_mask_ratio,pr_val/precision,pr_val/recall,pr_val/f1_score
1,0.986,0.9755,0.0233,72.0386,0.0134,0.9767,0.0264
2,0.953,0.9697,0.0374,55.0964,0.0172,0.9626,0.0337


2020-11-26 23:52:39,636 INFO     pid:71497 monai_training:337:log_metrics E1 CovidSegmentationTrainingApp
2020-11-26 23:52:39,637 INFO     pid:71497 monai_training:366:log_metrics E1 trn      0.9860 loss, 0.0070 precision, 0.5183 recall, 0.0138 f1 score 0.4817 miss rate 73.7583 fp to label ratio
2020-11-26 23:52:53,268 INFO     pid:71497 monai_training:337:log_metrics E1 CovidSegmentationTrainingApp
2020-11-26 23:52:53,269 INFO     pid:71497 monai_training:375:log_metrics E1 val      0.9755 loss, 0.0134 precision, 0.9767 recall, 0.0264 f1 score 0.0233 miss rate 72.0386 fp to label ratio
2020-11-26 23:52:54,305 INFO     pid:71497 monai_training:456:save_model Saved model params to saved-models/2020-11-26_23.51.33.16.state
2020-11-26 23:52:54,308 INFO     pid:71497 monai_training:461:save_model Saved model params to saved-models/2020-11-26_23.51.33.best.state
2020-11-26 23:52:54,312 INFO     pid:71497 monai_training:466:save_model SHA1: 376c6900c2bf51e39516273148b0cc2aca929e46
2020-11-26

# Train

In [4]:
batch_size = 2
train_epochs = 500
val_cadence = 5
ratio_int = 1
recall_priority = 0
unet_depth = 3
project_name = 'covid19_seg_v2'
pad_types = ['zero', 'replicate']
width_cri = (168,168,12)
data_path = str(local_dataset_path/'Train')

In [5]:
run_name = '3D Unet'

In [6]:
run('monai_training.CovidSegmentationTrainingApp', f'--batch-size={batch_size}', f'--epochs={train_epochs}', 
    f'--val-cadence={val_cadence}', f'--data-path={data_path}',
    f'--recall-priority={recall_priority}', f'--depth={unet_depth}', f'--pad-type={pad_types[1]}',
    '--width-cri', f'{width_cri[0]}', f'{width_cri[1]}', f'{width_cri[2]}',
    f'--project-name={project_name}', f'--run-name={run_name}')

# dummy run to finish wandb
run('monai_training.CovidSegmentationTrainingApp', '--epochs=0', f'--project-name={project_name}', '--run-name=to delete')

2020-11-20 12:04:42,862 INFO     pid:4709 nb:006:run Running: monai_training.CovidSegmentationTrainingApp(['--num-workers=4', '--batch-size=2', '--epochs=500', '--val-cadence=5', '--data-path=/mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train', '--recall-priority=0', '--depth=3', '--pad-type=replicate', '--width-cri', '168', '168', '12', '--project-name=covid19_seg_v2', '--run-name=3D Unet']).main()
2020-11-20 12:04:44,301 INFO     pid:4709 wandb:606:_apply_source_login setting login settings: {}
[34m[1mwandb[0m: Currently logged in as: [33mpeterbacalso[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


2020-11-20 12:04:45,671 INFO     pid:4709 monai_training:149:init_model Using CUDA; 1 devices.
2020-11-20 12:04:48,455 INFO     pid:4709 modules.monai_dset:076:get_ds training: image/label (199) folder: /mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train
2020-11-20 12:04:48,459 INFO     pid:4709 modules.monai_dset:094:get_ds training: train 179 val 20, folder: /mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train
Load and cache transformed data: 100%|██████████| 179/179 [05:47<00:00,  1.94s/it]
Load and cache transformed data: 100%|██████████| 20/20 [00:40<00:00,  2.05s/it]
2020-11-20 12:11:16,519 INFO     pid:4709 monai_training:413:main Starting CovidSegmentationTrainingApp, Namespace(batch_size=2, data_path='/mnt/16614710-4b50-44ce-a3f3-13105cdf14ad/datasets/COVID-19-20_v2/Train', depth=3, epochs=500, num_workers=4, pad_type='replicate', project_name='covid19_seg_v2', recall_priority=0, run_name='3D Unet', val_cadence=5, width_cri=['168', '168',

epoch,loss/trn,loss/val,metrics_val/miss_rate,metrics_val/fp_to_mask_ratio,pr_val/precision,pr_val/recall,pr_val/f1_score
1,0.901,0.8053,0.0659,6.3429,0.1284,0.9341,0.2257
5,0.7164,0.692,0.0409,3.6595,0.2077,0.9591,0.3414
10,0.6892,0.6796,0.0345,3.8348,0.2011,0.9655,0.3329
15,0.676,0.4657,0.1252,1.1108,0.4406,0.8748,0.586
20,0.6741,0.5094,0.0818,1.4764,0.3835,0.9182,0.541
25,0.6557,0.4753,0.1504,1.198,0.4149,0.8496,0.5576
30,0.6863,0.5354,0.0702,2.0799,0.3089,0.9298,0.4638
35,0.6543,0.4458,0.2457,0.6328,0.5438,0.7543,0.632


2020-11-20 12:17:35,991 INFO     pid:4709 monai_training:280:log_metrics E1 CovidSegmentationTrainingApp
2020-11-20 12:17:36,002 INFO     pid:4709 monai_training:309:log_metrics E1 trn      0.9010 loss, 0.0593 precision, 0.7643 recall, 0.1101 f1 score 0.2357 miss rate 12.1155 fp to label ratio
2020-11-20 12:20:43,919 INFO     pid:4709 monai_training:280:log_metrics E1 CovidSegmentationTrainingApp
2020-11-20 12:20:43,921 INFO     pid:4709 monai_training:318:log_metrics E1 val      0.8053 loss, 0.1284 precision, 0.9341 recall, 0.2257 f1 score 0.0659 miss rate 6.3429 fp to label ratio
2020-11-20 12:20:45,107 INFO     pid:4709 monai_training:399:save_model Saved model params to saved-models/2020-11-20_12.04.43.537.state
2020-11-20 12:20:45,110 INFO     pid:4709 monai_training:404:save_model Saved model params to saved-models/2020-11-20_12.04.43.best.state
2020-11-20 12:20:45,115 INFO     pid:4709 monai_training:409:save_model SHA1: c6c0e59fb75da6ddbcd46080deb2bd993f07965a
2020-11-20 12:27:

RuntimeError: DataLoader worker (pid 15423) is killed by signal: Killed. 