# Tuning of hyperparameters
#### Script to train the model multiple times using different values for batch size and learning rate. The training is done using a small subset of the total training dataset and the results are validated against a different subset.

In [1]:
!git clone -l -s git://github.com/juanigp/CT-denoising.git cloned-repo
%cd cloned-repo
from google.colab import drive
drive.mount('/gdrive', force_remount = True)

import os
from IPython.core.debugger import set_trace
from models.EDCNN import EDCNN
from utils import utils
import torch
import torch.nn as nn
import torch.utils.data.sampler as sampler
from torch.autograd import Variable
from matplotlib import pyplot as plt
import random
import numpy as np
import pickle


fatal: destination path 'cloned-repo' already exists and is not an empty directory.
/content/cloned-repo
Mounted at /gdrive


In [0]:
#csv file containing the directories of the lo res and ground truth patches
csv_file = r'/gdrive/My Drive/patches/100_FBPPhil.csv' 
dataset = utils.CTVolumesDataset(csv_file)

#split of data in training, validation and testing data:
#the .csv is shuffled (using the same seed everytime for repeatability)
num_samples = len(dataset)
total_idx = list(range(num_samples))
random.seed(10)
random.shuffle(total_idx)

#pick 10% of samples to test
testing_samples_percentage = 0.1
split_index = int( num_samples * testing_samples_percentage )
#pick the first 10% of samples in the shuffled dataset for testing
testing_idx = total_idx[0 : split_index]
#pick the other 90% of samples in the shuffled dataset for training
training_idx = total_idx[split_index : num_samples]
#pick the first 10% of samples used for training. These are the samples that are going to be used for training in this script
training_subset_idx = training_idx[0:split_index]
#pick the second 10% of samples used for validation
validation_subset_idx = training_idx[split_index: 2 * split_index]

training_subset_sampler = sampler.SubsetRandomSampler(training_subset_idx)
validation_sampler = sampler.SubsetRandomSampler(validation_subset_idx)

In [3]:
bs_list = [4, 8, 16, 32]
lr_list = [0.001, 0.0001, 0.00001]
bs_np, lr_np = np.array(bs_list), np.array(lr_list)
bs_mat, lr_mat = np.meshgrid(bs_np, lr_np)
bs_np = np.reshape(bs_mat, -1)
lr_np = np.reshape(lr_mat, -1)

training_losses = []
validation_losses = []

num_epochs = 100
criterion = nn.L1Loss()

for i in range(len(lr_np)):
  training_loss_list = []
  validation_loss_list = []
  lr = lr_np[i]
  bs = int(bs_np[i])
  training_subset_dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size = bs, sampler = training_subset_sampler)
  validation_dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size = bs, sampler = validation_sampler)
  model = EDCNN()
  optimizer = torch.optim.Adam(model.parameters(), lr = lr)  
  if torch.cuda.is_available():
      model.cuda()
      criterion.cuda() 
  for epoch in range(num_epochs):
    epoch_loss = 0
    num_batches = 0
    model.train()
    for batch, (lo_res, hi_res) in enumerate(training_subset_dataloader):
      #add an extra dimension:
      lo_res = utils.var_or_cuda( lo_res.unsqueeze(1) )
      hi_res = utils.var_or_cuda(hi_res)
      if lo_res.size()[0] != bs:
          #print("batch_size != {} drop last incompatible batch".format( bs ))
          continue
      num_batches += 1
      #forward pass 
      outputs = model(lo_res)
      loss = criterion(outputs, hi_res.unsqueeze(1))
      epoch_loss += loss.item()
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    print('Training epoch [{}/{}]' .format(epoch+1, num_epochs) )     
    training_loss_list.append( epoch_loss/num_batches )
      
    model.eval()
    epoch_loss = 0
    num_batches = 0
    with torch.no_grad():
      for batch, (lo_res, hi_res) in enumerate(validation_dataloader):
        #add an extra dimension:
        lo_res = utils.var_or_cuda( lo_res.unsqueeze(1) )
        hi_res = utils.var_or_cuda(hi_res)
        if lo_res.size()[0] != bs:
            #print("batch_size != {} drop last incompatible batch".format( bs ))
            continue
        num_batches += 1
        #forward pass 
        outputs = model(lo_res)
        loss = criterion(outputs, hi_res.unsqueeze(1))
        epoch_loss += loss.item()

      print('Validation epoch [{}/{}]'.format(epoch+1, num_epochs) )     
      validation_loss_list.append( epoch_loss/num_batches )

  training_losses.append( (training_loss_list, {"lr": lr, "batch_size": bs} ) )
  validation_losses.append( (validation_loss_list, {"lr": lr, "batch_size": bs} ) )


filename = r'/gdrive/My Drive/patches/hyperparams_analysis_2.txt'
with open(filename, "wb") as fp:   #Pickling
  pickle.dump((training_losses, validation_losses), fp)

Training epoch [1/100]
Validation epoch [1/100]
Training epoch [2/100]
Validation epoch [2/100]
Training epoch [3/100]
Validation epoch [3/100]
Training epoch [4/100]
Validation epoch [4/100]
Training epoch [5/100]
Validation epoch [5/100]
Training epoch [6/100]
Validation epoch [6/100]
Training epoch [7/100]
Validation epoch [7/100]
Training epoch [8/100]
Validation epoch [8/100]
Training epoch [9/100]
Validation epoch [9/100]
Training epoch [10/100]
Validation epoch [10/100]
Training epoch [11/100]
Validation epoch [11/100]
Training epoch [12/100]
Validation epoch [12/100]
Training epoch [13/100]
Validation epoch [13/100]
Training epoch [14/100]
Validation epoch [14/100]
Training epoch [15/100]
Validation epoch [15/100]
Training epoch [16/100]
Validation epoch [16/100]
Training epoch [17/100]
Validation epoch [17/100]
Training epoch [18/100]
Validation epoch [18/100]
Training epoch [19/100]
Validation epoch [19/100]
Training epoch [20/100]
Validation epoch [20/100]
Training epoch [21

'\nwith open(filename, "rb") as fp:   # Unpickling\n  b = pickle.load(fp)\n'