# Training of EDCNN

In [0]:
!git clone -l -s git://github.com/juanigp/CT-denoising.git cloned-repo
%cd cloned-repo
from google.colab import drive
drive.mount('/gdrive', force_remount = True)

import os
from IPython.core.debugger import set_trace
from models.EDCNN import EDCNN
from utils import utils
import torch
import torch.nn as nn
import torch.utils.data.sampler as sampler
from torch.autograd import Variable
from matplotlib import pyplot as plt
import random
import numpy as np

Cloning into 'cloned-repo'...
remote: Enumerating objects: 250, done.[K
remote: Counting objects: 100% (250/250), done.[K
remote: Compressing objects: 100% (201/201), done.[K
remote: Total 250 (delta 121), reused 134 (delta 44), pack-reused 0[K
Receiving objects: 100% (250/250), 40.47 MiB | 25.24 MiB/s, done.
Resolving deltas: 100% (121/121), done.
/content/cloned-repo
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /gdrive


## Hyperparameters, model, dataset and dataloader

In [0]:
#hyperparameters:
num_epochs = 100
batch_size = 32
learning_rate = 0.00001

#instantiating the model:
model = EDCNN()

#loss function
criterion = nn.L1Loss()

#optimizer algorithm
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

#if gpu available
if torch.cuda.is_available():
    model.cuda()
    criterion.cuda()
    
#dataset and dataloaders
#csv file containing the directories of the lo res and ground truth patches
csv_file = r'/gdrive/My Drive/patches/100_FBPPhil.csv' 
dataset = utils.CTVolumesDataset(csv_file)

#split of data in training and testing data:
#the .csv is shuffled (using the same seed everytime for repeatability)
num_samples = len(dataset)
total_idx = list(range(num_samples))
random.seed(10)
random.shuffle(total_idx)

#pick 10% of samples to test
testing_samples_percentage = 0.1
split_index = int( num_samples * testing_samples_percentage )
#pick the first 10% of samples in the shuffled dataset for testing
testing_idx = total_idx[0 : split_index]
#pick the other 90% of samples in the shuffled dataset for training
training_idx = total_idx[split_index : num_samples]
#random samplers for training and testing
training_sampler = sampler.SubsetRandomSampler(training_idx)
testing_sampler = sampler.SubsetRandomSampler(testing_idx)
#dataloaders for training and testing
training_dataloader = torch.utils.data.DataLoader(dataset = dataset, batch_size = batch_size, sampler = training_sampler)
testing_dataloader = torch.utils.data.DataLoader(dataset = dataset, batch_size = batch_size, sampler = testing_sampler)

## Training the model!

In [0]:
#save a checkpoint of the model!
def save_checkpoint(state, filename='checkpoint.pth.tar'):
    torch.save(state, filename)

#directory to save the models
models_dir = r'/gdrive/My Drive/models'
#file to record metrics  
metrics_file_name = 'training_loss.csv' 
metrics_file_dir = os.path.join(models_dir, metrics_file_name)

#loading a previously trained model
resume_checkpoint = False
#checkpoint_file_dir = 
if resume_checkpoint:
  checkpoint = torch.load(checkpoint_file_dir)
  start_epoch = checkpoint['epoch']
  model.load_state_dict(checkpoint['model'])
  optimizer.load_state_dict(checkpoint['optimizer'])
else:
  start_epoch = 0

#TRAINING
for epoch in range(start_epoch, num_epochs):
    #training epoch
    #training_epoch_loss = 0
    #num_batches = 0
    model.train()
    for i, (lo_res, hi_res) in enumerate(training_dataloader):
        #add an extra dimension:
        lo_res = utils.var_or_cuda( lo_res.unsqueeze(1) )
        hi_res = utils.var_or_cuda(hi_res)
        if lo_res.size()[0] != batch_size:
          continue  
        num_batches += 1 
        #forward pass 
        outputs = model(lo_res)
        loss = criterion(outputs, hi_res.unsqueeze(1))
        #training_epoch_loss += loss.item()
        #backward & optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    #training_epoch_loss /= num_batches
    #print('Training epoch [{}/{}]'.format(epoch+1, num_epochs))

    #save model after training epoch   
    #checkpoint_file_dir = os.path.join(models_dir, 'EDCNN_checkpoint_epoch_' + str(epoch + 1) + '.pth.tar' )   
    #save_checkpoint({
    #    'epoch': epoch + 1,
    #    'model': model.state_dict(),
    #    'optimizer' : optimizer.state_dict(),
    #}, checkpoint_file_dir)
       
#EVALUATION OF MODEL AFTER EPOCH
    model.eval()
    with torch.no_grad():

      #ON TRAINING DATASET
      training_epoch_loss = 0
      num_batches = 0
      for i, (lo_res, hi_res) in enumerate(training_dataloader):
          #add an extra dimension:
          lo_res = utils.var_or_cuda( lo_res.unsqueeze(1) )
          hi_res = utils.var_or_cuda(hi_res)
          if lo_res.size()[0] != batch_size:
            continue  
          num_batches += 1 
          #forward pass 
          outputs = model(lo_res)
          loss = criterion(outputs, hi_res.unsqueeze(1))
          training_epoch_loss += loss.item()
          #backward & optimize
          #optimizer.zero_grad()
          #loss.backward()
          #optimizer.step()

      training_epoch_loss /= num_batches
      print('Training epoch [{}/{}]'.format(epoch+1, num_epochs))

      #ON TESTING DATASET
      testing_epoch_loss = 0
      num_batches = 0
      for batch, (lo_res, hi_res) in enumerate(testing_dataloader):
        #add an extra dimension:
        lo_res = utils.var_or_cuda( lo_res.unsqueeze(1) )
        hi_res = utils.var_or_cuda(hi_res)
        if lo_res.size()[0] != batch_size:
            continue
        num_batches += 1
        outputs = model(lo_res)
        loss = criterion(outputs, hi_res.unsqueeze(1))
        testing_epoch_loss += loss.item()

    testing_epoch_loss /= num_batches
    print('Testing epoch [{}/{}]'.format(epoch+1, num_epochs) )     

    csv_line = str(training_epoch_loss) + ',' + str(testing_epoch_loss) + '\n'
    with open(metrics_file_dir , 'a+') as file:
        file.write(csv_line)

Training epoch [1/100]
Testing epoch [1/100]
Training epoch [2/100]
Testing epoch [2/100]
Training epoch [3/100]
Testing epoch [3/100]
Training epoch [4/100]
Testing epoch [4/100]
Training epoch [5/100]
Testing epoch [5/100]
Training epoch [6/100]
Testing epoch [6/100]
Training epoch [7/100]
Testing epoch [7/100]
Training epoch [8/100]
Testing epoch [8/100]
Training epoch [9/100]
Testing epoch [9/100]
Training epoch [10/100]
Testing epoch [10/100]
Training epoch [11/100]
Testing epoch [11/100]
Training epoch [12/100]
Testing epoch [12/100]
Training epoch [13/100]
Testing epoch [13/100]
Training epoch [14/100]
Testing epoch [14/100]
Training epoch [15/100]
Testing epoch [15/100]
Training epoch [16/100]
Testing epoch [16/100]
Training epoch [17/100]
Testing epoch [17/100]
Training epoch [18/100]
Testing epoch [18/100]
Training epoch [19/100]
Testing epoch [19/100]
Training epoch [20/100]
Testing epoch [20/100]
Training epoch [21/100]
Testing epoch [21/100]
Training epoch [22/100]
Testing