In [3]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm
from dataset import *
from model import *
from loss import *
import os
# import SimpleITK as sitk
%matplotlib widget

In [4]:
mode='gpu'

In [5]:
if mode=='gpu':
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # after switch device, you need restart the kernel
#     torch.cuda.set_device(1)
    torch.set_default_tensor_type('torch.cuda.DoubleTensor')
else:
    device = torch.device('cpu')
    torch.set_default_dtype(torch.float64)

1. For classifications(segmentation=voxel-wise classification), `F.softmax(output, dim=1)` is very necessary at the end of the model, as it constraints the output into a probability, or you may have negative value that you also have no clue where it comes from.
2. The numerator in dice loss for each category is very much like the cross entropy: a softmax vector inner product with a one-hot vector - only the value at where one is matters.
2. For segmentation, use dice loss.

## Training
### initialization

In [6]:
resume = False
save_model = False
print(f'resume:{resume}, save_model:{save_model}')
output_dir = 'Models/UnetNopatch'

resume:False, save_model:False


In [9]:
epoch_loss_list = []
epoch_num = 301
start_epoch_num = 0
batch_size = 12
learning_rate = 1e-3

model = UNet64()
model.train()
if mode=='gpu':
    model.cuda()
net = torch.nn.DataParallel(model, device_ids=[0, 1])
# criterion = DiceLoss()
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adadelta(model.parameters(), lr=learning_rate)

dataset = UnetDataset(root_dir='/home/sci/kyle.anderson/lymph_nodes/Dataset')
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

if resume:
    checkpoint = torch.load(f'{output_dir}/epoch_{start_epoch_num-1}_checkpoint.pth.tar')    
    model.load_state_dict(checkpoint['model_state_dict'])
#     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    
    with open(f'{output_dir}/loss.txt', 'a') as f:
        f.write(f'From {start_epoch_num} to {epoch_num+start_epoch_num}\n')
        f.write(f'Dice; Adadelta, lr={learning_rate}; batch size: {batch_size}\n')
else:
    start_epoch_num = 0  
    
    with open(f'{output_dir}/loss.txt', 'w+') as f:
        f.write(f'From {start_epoch_num} to {epoch_num+start_epoch_num}\n')
        f.write(f'Dice; Adadelta: lr={learning_rate}; batch size: {batch_size}\n')
    
print(f'Starting from iteration {start_epoch_num} to iteration {epoch_num+start_epoch_num}')

# params 464849, # conv layers 30


FileNotFoundError: [Errno 2] No such file or directory: 'Models/UnetNopatch/loss.txt'

### process

In [6]:
for epoch in tqdm(range(start_epoch_num, start_epoch_num+epoch_num)):
    epoch_loss = 0
            
    for i, batched_sample in tqdm(enumerate(dataloader)):
        '''innerdomain backpropagate'''
#         print(i)
        input_data = batched_sample['img'].double()#.to(device)
#         print(input.shape)
        input_data.requires_grad = True
        # u_pred: [batch_size, *data_shape, feature_num] = [1, 5, ...]
        output_pred = net(input_data)
        output_true = batched_sample['mask']#.to(device)#.double()
#         print(output_pred.shape, output_true.shape)
    
        optimizer.zero_grad()
#         loss = criterion(output_pred, output_true.squeeze())
        loss = criterion(output_pred, output_true.double())
        loss.backward()
        epoch_loss += loss.item()
        optimizer.step()
        
    with open(f'{output_dir}/loss.txt', 'a') as f:
        f.write(f'{epoch_loss}\n')
    
    print(f'epoch {epoch} innerdomain loss: {epoch_loss}')#, norm: {torch.norm(f_pred,2)**2}
    epoch_loss_list.append(epoch_loss)
    if epoch%1==0:       
        if save_model:
            torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
#             'optimizer_bd_state_dict': optimizer_bd.state_dict(),
            'loss': epoch_loss,
#             'loss_bd': epoch_loss_bd
            }, f'{output_dir}/epoch_{epoch}_checkpoint.pth.tar')

  0%|          | 0/301 [00:00<?, ?it/s]
0it [00:39, ?it/s][A
  0%|          | 0/301 [00:39<?, ?it/s]


IndexError: too many indices for tensor of dimension 4

In [None]:
print(input_id.shape)
print(output_pred_id.shape, output_true_id.shape)

In [None]:
output_true_id.min()

In [None]:
plt.figure(figsize=(7,5))
plt.title('Innerdomain loss')
plt.xlabel('epoch')
plt.ylabel('MSE loss')
plt.plot(epoch_loss_list)
plt.savefig(f'{output_dir}/adadelta_loss_1e-1.png')