In [1]:
!pip install numpy
!pip install torch==1.5.1 torchvision==0.6.1
!pip install pytorch_lightning==0.8.4
!pip install comet-ml
!pip install torchsummary



In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Wed Jul 29 03:15:18 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# Mount your personal google drive
from google.colab import drive 
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
%%writefile setup.sh

export CUDA_HOME=/usr/local/cuda-10.1
cd /content/drive/My\ Drive/KBP_Challenge/apex
ls
pip install -v --no-cache-dir ./

Overwriting setup.sh


In [5]:
# !sh setup.sh

In [6]:
primary_directory = '/content/drive/My Drive/KBP_Challenge'
%cd /content/drive/My\ Drive/KBP_Challenge
%ls

/content/drive/My Drive/KBP_Challenge
[0m[01;34mapex[0m/         [01;34mdata[0m/            [01;34mpretrained_models[0m/  setup.sh
[01;34mcheckpoints[0m/  [01;34mlightning_logs[0m/  [01;34mprovided_code[0m/      [01;34msrc[0m/


In [7]:
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateLogger
from pytorch_lightning.logging import CometLogger
from src.models.pix2pix_model import Pix2PixModel
from src.models.wgan import WGan
from pytorch_lightning.callbacks import ModelCheckpoint
from src.options.train_options import TrainOptions
from torchsummary import summary

In [8]:
args = ['--batchSize', '8',
        '--primary_directory', primary_directory,
        '--which_model_netG', 'unet_128_3d',
        '--which_model_netD', 'n_layers_3d',
        '--n_layers_D', '3',
        '--num_D', '1',
        '--norm_D', 'batch',
        '--norm_G', 'batch',
        '--resnet_depth', '18',
        '--which_direction', 'AtoB',
        '--input_nc', '1',
        '--lambda_A', '100',
        '--lr_policy', 'plateau',
        '--epoch_count', '200',
        '--load_epoch', '-1',
        '--lr_D', '0.01',
        '--lr_G', '0.01',
        '--lr_max', '0.01',
        '--lr_step_size', '25',
        '--loss_function', 'smoothed_L1',
        '--init_type', 'xavier',
        '--no_scaling',
        '--no_normalization',
        '--no_perceptual_loss',
        '--patience', '5',
        '--n_critic', '1',
        '--n_generator', '1',
        '--cut_blur_mask', '40',
        '--weight_cliping_limit', '0.01']

opt = TrainOptions().parse(args)

------------ Options -------------
batchSize: 8
beta1: 0.5
checkpoints_dir: ./checkpoints
continue_train: False
cut_blur_mask: 40
dataset_mode: unaligned
display_freq: 200
display_id: 1
display_port: 8097
display_single_pane_ncols: 0
display_winsize: 128
epoch_count: 200
fineSize: 128
gpu_ids: -1
init_type: xavier
input_nc: 1
inst_noise_sigma: 0.0
inst_noise_sigma_iters: 200
isTrain: True
lambda_A: 100.0
lambda_B: 10.0
lambda_identity: 0.5
lambda_perceptual: 10.0
loadSize: 128
load_epoch: -1
loss_function: smoothed_L1
lr_D: 0.01
lr_G: 0.01
lr_decay_iters: 50
lr_max: 0.01
lr_policy: plateau
lr_step_size: 25
max_dataset_size: inf
model: pix2pix
nThreads: 2
n_critic: 1
n_generator: 1
n_layers_D: 3
name: experiment_name
ndf: 64
ngf: 64
niter: 100
niter_decay: 100
no_augment: False
no_dropout: False
no_flip: False
no_html: False
no_img: False
no_lsgan: False
no_normalization: True
no_perceptual_loss: True
no_scaling: True
norm: batch_3d
norm_D: batch
norm_G: batch
num_D: 1
nwf: 64
output_nc

In [9]:
model = Pix2PixModel(opt, stage='training')
summary(model.generator.to("cuda"), (1, 128, 128, 128))

  init.xavier_normal(m.weight.data, gain=0.02)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv3d-1       [-1, 64, 64, 64, 64]           4,096
         LeakyReLU-2       [-1, 64, 64, 64, 64]               0
            Conv3d-3      [-1, 128, 32, 32, 32]         524,288
       BatchNorm3d-4      [-1, 128, 32, 32, 32]             256
         LeakyReLU-5      [-1, 128, 32, 32, 32]               0
            Conv3d-6      [-1, 256, 16, 16, 16]       2,097,152
       BatchNorm3d-7      [-1, 256, 16, 16, 16]             512
         LeakyReLU-8      [-1, 256, 16, 16, 16]               0
            Conv3d-9         [-1, 512, 8, 8, 8]       8,388,608
      BatchNorm3d-10         [-1, 512, 8, 8, 8]           1,024
        LeakyReLU-11         [-1, 512, 8, 8, 8]               0
           Conv3d-12         [-1, 512, 4, 4, 4]      16,777,216
      BatchNorm3d-13         [-1, 512, 4, 4, 4]           1,024
        LeakyReLU-14         [-1, 512, 

In [10]:
checkpoints_dir = '{}/checkpoints'.format(primary_directory)
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoints_dir,
    verbose=True,
    save_last=False,
    save_top_k=0,
    monitor='loss',
    mode='min'
)
lr_logger = LearningRateLogger()

comet_logger = CometLogger(
    api_key="eyAsnp1KA7fXLxFMkEWKjhygS",
    project_name="kbp-challenge",
    workspace="mahruskazi"
)

CometLogger will be initialized in online mode


In [None]:
checkpoint = None
if opt.load_epoch != -1:
  checkpoint = '{}/epoch={}.ckpt'.format(checkpoints_dir, opt.load_epoch)

trainer = pl.Trainer(logger=comet_logger,
                     resume_from_checkpoint=checkpoint,
                     gpus=1,
                     checkpoint_callback=checkpoint_callback,
                     callbacks=[lr_logger],
                     max_epochs=opt.epoch_count,
                     check_val_every_n_epoch=1,
                     num_sanity_val_steps=10,
                     limit_val_batches=1.0,
                     accumulate_grad_batches=1,
                     weights_summary='full')
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]
COMET INFO: Experiment is live on comet.ml https://www.comet.ml/mahruskazi/kbp-challenge/64bf7e3e1d854314b28b87091f9b4534


   | Name                                                                    | Type                    | Params
---------------------------------------------------------------------------------------------------------------------
0  | generator                                                               | UnetGenerator           | 167 M 
1  | generator.model                                                         | UnetSkipConnectionBlock | 167 M 
2  | generator.model.model                                                   | Sequential              | 167 M 
3  | generator.model.model.0                                                 | Conv3d                  | 4 K   
4  | generator.model.model.1                                                 | UnetSkipConnectionBl

Number of validation patients: 50




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

100%|██████████| 10/10 [00:07<00:00,  1.32it/s]

{'dvh_score': 48.634659871437954, 'dose_score': 22.686427216751973}
Number of training patients: 150
Number of validation patients: 50





HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

 32%|███▏      | 16/50 [00:11<00:24,  1.39it/s]