In [1]:
%matplotlib inline

import os
import time
import math
import glob
import shutil
import numpy as np
#import pandas as pd
import matplotlib.pyplot as plt
import h5py

from pathlib import Path
from torch.utils.data import Dataset, DataLoader, sampler
from torch.cuda.amp import GradScaler, autocast
from PIL import Image

import torch
from torch.nn import functional as F
from torch.autograd import Variable

##############################
from models import deeplab_resnet_hyper
from models import deeplab_xception
from datasets import datasets

from utils import manager as mgr
from utils import metrics
from utils import losses
from utils import log
from utils import img_utils
##############################

nr_classes = 4
nr_channels = 7
#exp_name = 'deeplab_df_std_f01'
exp_name = 'deeplab_std_resnet_cr32'
DEVICE = "cuda:1"
#WEIGHTS_PATH = 'weights_f01/'
WEIGHTS_PATH = 'weights/' + exp_name + '/'

backbone = 'resnet'#'xception'#'resnet'

# set device
#device = torch.device(DEVICE if torch.cuda.is_available() else "cpu")
device = "cuda:1"
device_nr = int(device[-1])
#device = "cpu"
print(device)

# batch size (256) 56, (512) 15
batch_size = 72
split={'train':0.7, 'val':0.1, 'test':0.2}
num_workers = 4
pin_memory = True

LR = 0.001
LR_DECAY = 0.995
DECAY_EVERY_N_EPOCHS = 1
N_EPOCHS = 25
start_epoch = 1

cr = 32
dsize = 256
ddepth = 3
os = 16

cuda:1


In [2]:
## Creating the dataset
path_dataset = "/home/philipp/Data/dataset_256_df_177.h5"
#path_dataset = "/home/philipp/Data/dataset_ext_256_df_177.h5"
dataset = datasets.ForestDataset(path_dataset, ground_truth='ground_truth_std')

if dataset[0][0].shape[1] == 256:
    chunk_size = 1000
else:
    chunk_size = 0

# shuffle and split
train_sampler, val_sampler, test_sampler = dataset.get_sampler(split=split, \
                shuffle_dataset=True, random_seed=399, chunk_size=chunk_size, fold=0)

# dataloader
train_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory)
val_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=val_sampler, num_workers=num_workers, pin_memory=pin_memory)
test_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=test_sampler, num_workers=num_workers, pin_memory=pin_memory)
print(len(dataset))
print(len(train_sampler.indices))
print(len(val_sampler.indices))
print(len(test_sampler.indices))

42716
29908
4268
8540


In [3]:
# show examples
#img_utils.view_sample(train_dl, 10)

In [4]:
#############
#   train
#############
def train(model, trn_loader, optimizer, criterion, epoch):
    
    model.train()
    step = 0
    metric = {'loss': 0.0, \
              'acc_all':0.0, \
              'acc_mean':0.0, \
              'jacc':0.0, \
              'dice':0.0}

    for idx, data in enumerate(trn_loader):
        x = Variable(data[0].to(device))
        y = Variable(data[1].to(device))
        step += 1
        
        # forward pass
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        
        # backward pass
        loss.backward()
        # update weights
        optimizer.step()

        # calculate predictions
        pred = mgr.get_predictions(output)
        
        # calculate metrics for the batch
        overall_acc, avg_per_class_acc, avg_jacc, avg_dice = metrics.eval_metrics(y, pred, num_classes=nr_classes, device=device)
        # update running metrics
        for i, m in zip(metric, [loss, overall_acc, avg_per_class_acc, avg_jacc, avg_dice]):
            metric[i] += m*trn_loader.batch_size
        
        # print metrics to console
        if step % 100 == 0:
            print('Step: {}  Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
                  .format(step, loss, overall_acc, avg_per_class_acc, avg_jacc, avg_dice))
    
    # calculate metrics for the epoch      
    for i in metric:
        metric[i] /= len(train_sampler.indices)
                    
    return metric

#############
#   test
#############
def test(model, test_loader, criterion, epoch=1):
    
    model.eval()
    step = 0
    metric = {'loss': 0.0, \
              'acc_all':0.0, \
              'acc_mean':0.0, \
              'jacc':0.0, \
              'dice':0.0}
    
    for data, target in test_loader:
        x = Variable(data.to(device))
        y = Variable(target.to(device))
        step += 1
        
        # forward pass
        with torch.no_grad():
            output = model(x)
            loss = criterion(output, y)
            #test_loss += criterion(output, y).data.item()
            
        # calculate predictions    
        pred = mgr.get_predictions(output)
        
        # calculate metrics for the batch
        overall_acc, avg_per_class_acc, avg_jacc, avg_dice = metrics.eval_metrics(y, pred, num_classes=nr_classes, device=device)
        # update running metrics
        for i, m in zip(metric, [loss, overall_acc, avg_per_class_acc, avg_jacc, avg_dice]):
            metric[i] += m*test_loader.batch_size
            
        # print metrics to console
        if step % 100 == 0:
            print('Step: {}  Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
                  .format(step, loss, overall_acc, avg_per_class_acc, avg_jacc, avg_dice))
        
    # calculate metrics for the epoch
    for i in metric:
        metric[i] /= len(val_sampler.indices)
                    
    return metric

In [5]:
# Network definition
if backbone == 'xception':
    model = deeplab_xception.DeepLabv3_plus(nInputChannels=nr_channels, \
                                            n_classes=nr_classes, os=os, pretrained=False)
elif backbone == 'resnet':
    model = deeplab_resnet_hyper.DeepLabv3_plus(nInputChannels=nr_channels, \
                                                n_classes=nr_classes, os=os, \
                                                pretrained=False, \
                                                cr=cr, \
                                                dsize=dsize, \
                                                ddepth=ddepth)
else:
    raise NotImplementedError

try:
    start_epoch = mgr.load_weights(model, WEIGHTS_PATH + 'latest.pth')
    #load_weights(model, WEIGHTS_PATH+'latest_5d.pt')
    print("weights loaded")
except:
    #model.apply(weights_init)
    #os.makedirs(WEIGHTS_PATH, exist_ok=True) 
    print("no weights found")

torch.cuda.set_device(device=device_nr)
model.cuda()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
#optimizer = optim.SGD(net.parameters(), lr=p['lr'], momentum=p['momentum'], weight_decay=p['wd'])
criterion = losses.Dice_Loss()
#criterion = torch.nn.CrossEntropyLoss()

#    
#if resume_epoch == 0:
#    print("Training deeplabv3+ from scratch...")
#else:
#    print("Initializing weights from: {}...".format(
#        os.path.join(save_dir, 'models', modelName + '_epoch-' + str(resume_epoch - 1) + '.pth')))
#    net.load_state_dict(
#        torch.load(os.path.join(save_dir, 'models', modelName + '_epoch-' + str(resume_epoch - 1) + '.pth'),
#                   map_location=lambda storage, loc: storage)) # Load all tensors onto the CPU

Constructing DeepLabv3+ model...
Number of classes: 4
Output stride: 16
Number of Input Channels: 7
loading weights 'weights/deeplab_std_resnet_cr32/latest.pth'
loaded weights (lastEpoch 14, loss 0.1510310173034668, error 0.8822174072265625)
weights loaded


In [6]:
## Initialising logging
logging = log.Log(exp_name)
start_epoch = 1
logging.start_log(LR, N_EPOCHS)
logging.start_csv()

for epoch in range(start_epoch, N_EPOCHS+1):
    since = time.time()
    
    print('-' * 10)
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)

    ### Train ###
    metric = train(model, train_dl, optimizer, criterion, epoch)
    # print metrics to console
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)
    print('Train Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'train')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'train')
    
    ### Test ###
    metric = test(model, val_dl, criterion, epoch) 
    print('-' * 10)
    print('Val Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'val')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'val')
    
    ### Checkpoint ###    
    mgr.save_weights(model, epoch, metric['loss'], metric['acc_all'], WEIGHTS_PATH)

    ### Adjust Lr ###
    mgr.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS)

----------
Epoch 1/25
----------




Step: 100  Loss: 0.2050  Acc all: 0.8365  Acc mean: 0.7690  IoU: 0.6666  Dice: 0.7950
Step: 200  Loss: 0.1601  Acc all: 0.8729  Acc mean: 0.8232  IoU: 0.7313  Dice: 0.8400
Step: 300  Loss: 0.1484  Acc all: 0.8541  Acc mean: 0.8635  IoU: 0.7561  Dice: 0.8516
Step: 400  Loss: 0.2061  Acc all: 0.8124  Acc mean: 0.7939  IoU: 0.6835  Dice: 0.7941
Epoch 1/25
----------
Train Loss: 0.1611  Acc all: 0.8660  Acc mean: 0.8297  IoU: 0.7393  Dice: 0.8404
----------
Train Time 7m 6s
----------
Val Loss: 0.1623  Acc all: 0.8675  Acc mean: 0.8491  IoU: 0.7494  Dice: 0.8499
----------
Total Time 7m 35s

----------
Epoch 2/25
----------
Step: 100  Loss: 0.1424  Acc all: 0.8805  Acc mean: 0.8408  IoU: 0.7626  Dice: 0.8577
Step: 200  Loss: 0.1893  Acc all: 0.8479  Acc mean: 0.8242  IoU: 0.6997  Dice: 0.8107
Step: 300  Loss: 0.1193  Acc all: 0.8850  Acc mean: 0.8587  IoU: 0.7962  Dice: 0.8808
Step: 400  Loss: 0.2188  Acc all: 0.8158  Acc mean: 0.7681  IoU: 0.6530  Dice: 0.7812
Epoch 2/25
----------
Train 

Step: 200  Loss: 0.1065  Acc all: 0.9109  Acc mean: 0.9036  IoU: 0.8154  Dice: 0.8935
Step: 300  Loss: 0.1590  Acc all: 0.8852  Acc mean: 0.8493  IoU: 0.7391  Dice: 0.8411
Step: 400  Loss: 0.1076  Acc all: 0.9109  Acc mean: 0.8675  IoU: 0.8155  Dice: 0.8924
Epoch 14/25
----------
Train Loss: 0.1456  Acc all: 0.8792  Acc mean: 0.8444  IoU: 0.7603  Dice: 0.8559
----------
Train Time 6m 60s
----------
Val Loss: 0.1497  Acc all: 0.8847  Acc mean: 0.8474  IoU: 0.7671  Dice: 0.8625
----------
Total Time 7m 28s

----------
Epoch 15/25
----------
Step: 100  Loss: 0.1496  Acc all: 0.8664  Acc mean: 0.8234  IoU: 0.7531  Dice: 0.8505
Step: 200  Loss: 0.0909  Acc all: 0.9177  Acc mean: 0.9141  IoU: 0.8394  Dice: 0.9091
Step: 300  Loss: 0.1360  Acc all: 0.8886  Acc mean: 0.8430  IoU: 0.7731  Dice: 0.8640
Step: 400  Loss: 0.1562  Acc all: 0.8672  Acc mean: 0.8694  IoU: 0.7475  Dice: 0.8438
Epoch 15/25
----------
Train Loss: 0.1435  Acc all: 0.8806  Acc mean: 0.8453  IoU: 0.7629  Dice: 0.8580
-------

In [7]:
# cr 64

In [9]:
torch.cuda.empty_cache()

nr_classes = 4
nr_channels = 7
exp_name = 'deeplab_std_resnet_cr64'
WEIGHTS_PATH = 'weights/' + exp_name + '/'
N_EPOCHS = 25
start_epoch = 1
backbone = 'resnet'

cr = 64
dsize = 256
ddepth = 3
os = 16

#######################################################

## Creating the dataset
path_dataset = "/home/philipp/Data/dataset_256_df_177.h5"
#path_dataset = "/media/philipp/DATA/dataset/dataset_256_df_0.h5"
dataset = datasets.ForestDataset(path_dataset, ground_truth='ground_truth_std')

if dataset[0][0].shape[1] == 256:
    chunk_size = 1000
else:
    chunk_size = 0

# shuffle and split
train_sampler, val_sampler, test_sampler = dataset.get_sampler(split=split, \
                shuffle_dataset=True, random_seed=399, chunk_size=chunk_size)

# dataloader
train_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory)
val_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=val_sampler, num_workers=num_workers, pin_memory=pin_memory)
test_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=test_sampler, num_workers=num_workers, pin_memory=pin_memory)
print(len(dataset))
print(len(train_sampler.indices))
print(len(val_sampler.indices))
print(len(test_sampler.indices))

#######################################################

# Network definition
if backbone == 'xception':
    model = deeplab_xception.DeepLabv3_plus(nInputChannels=nr_channels, \
                                            n_classes=nr_classes, os=os, pretrained=False)
elif backbone == 'resnet':
    model = deeplab_resnet_hyper.DeepLabv3_plus(nInputChannels=nr_channels, \
                                                n_classes=nr_classes, os=os, \
                                                pretrained=False, \
                                                cr=cr, \
                                                dsize=dsize, \
                                                ddepth=ddepth)
else:
    raise NotImplementedError

try:
    start_epoch = mgr.load_weights(model, WEIGHTS_PATH + 'latest.pth')
    #load_weights(model, WEIGHTS_PATH+'latest_5d.pt')
    print("weights loaded")
except:
    #model.apply(weights_init)
    #os.makedirs(WEIGHTS_PATH, exist_ok=True) 
    print("no weights found")

torch.cuda.set_device(device=device_nr)
model.cuda()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
#optimizer = optim.SGD(net.parameters(), lr=p['lr'], momentum=p['momentum'], weight_decay=p['wd'])
criterion = losses.Dice_Loss()
#criterion = torch.nn.CrossEntropyLoss()

#######################################################

## Initialising logging
logging = log.Log(exp_name)
start_epoch = 1
logging.start_log(LR, N_EPOCHS)
logging.start_csv()

for epoch in range(1, N_EPOCHS+1):
    since = time.time()
    
    print('-' * 10)
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)

    ### Train ###
    metric = train(model, train_dl, optimizer, criterion, epoch)
    # print metrics to console
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)
    print('Train Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'train')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'train')
    
    ### Test ###
    metric = test(model, val_dl, criterion, epoch) 
    print('-' * 10)
    print('Val Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'val')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'val')
    
    ### Checkpoint ###    
    mgr.save_weights(model, epoch, metric['loss'], metric['acc_all'], WEIGHTS_PATH)

    ### Adjust Lr ###
    mgr.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS)

42716
29908
4268
8540
Constructing DeepLabv3+ model...
Number of classes: 4
Output stride: 16
Number of Input Channels: 7
loading weights 'weights/deeplab_std_resnet_cr64/latest.pth'
loaded weights (lastEpoch 20, loss 0.14638642966747284, error 0.8816577196121216)
weights loaded
----------
Epoch 1/25
----------
Step: 100  Loss: 0.1526  Acc all: 0.8759  Acc mean: 0.8452  IoU: 0.7539  Dice: 0.8474
Step: 200  Loss: 0.1715  Acc all: 0.8779  Acc mean: 0.8500  IoU: 0.7258  Dice: 0.8285
Step: 300  Loss: 0.1241  Acc all: 0.8988  Acc mean: 0.8747  IoU: 0.7911  Dice: 0.8759
Step: 400  Loss: 0.1599  Acc all: 0.8637  Acc mean: 0.8386  IoU: 0.7304  Dice: 0.8401
Epoch 1/25
----------
Train Loss: 0.1536  Acc all: 0.8724  Acc mean: 0.8365  IoU: 0.7493  Dice: 0.8479
----------
Train Time 6m 38s
----------
Val Loss: 0.1569  Acc all: 0.8806  Acc mean: 0.8456  IoU: 0.7571  Dice: 0.8554
----------
Total Time 7m 5s

----------
Epoch 2/25
----------
Step: 100  Loss: 0.1815  Acc all: 0.8499  Acc mean: 0.8020 

Epoch 13/25
----------
Train Loss: 0.1433  Acc all: 0.8819  Acc mean: 0.8468  IoU: 0.7635  Dice: 0.8583
----------
Train Time 6m 46s
----------
Val Loss: 0.1505  Acc all: 0.8823  Acc mean: 0.8531  IoU: 0.7665  Dice: 0.8617
----------
Total Time 7m 12s

----------
Epoch 14/25
----------
Step: 100  Loss: 0.1459  Acc all: 0.8648  Acc mean: 0.8382  IoU: 0.7593  Dice: 0.8541
Step: 200  Loss: 0.1907  Acc all: 0.8544  Acc mean: 0.7727  IoU: 0.6881  Dice: 0.8093
Step: 300  Loss: 0.1286  Acc all: 0.8900  Acc mean: 0.9088  IoU: 0.7861  Dice: 0.8715
Step: 400  Loss: 0.1629  Acc all: 0.8702  Acc mean: 0.8425  IoU: 0.7381  Dice: 0.8372
Epoch 14/25
----------
Train Loss: 0.1401  Acc all: 0.8828  Acc mean: 0.8502  IoU: 0.7679  Dice: 0.8615
----------
Train Time 6m 45s
----------
Val Loss: 0.1487  Acc all: 0.8841  Acc mean: 0.8530  IoU: 0.7680  Dice: 0.8635
----------
Total Time 7m 12s

----------
Epoch 15/25
----------
Step: 100  Loss: 0.1847  Acc all: 0.8356  Acc mean: 0.7722  IoU: 0.6981  Dice: 0.8

In [10]:
# dsize 512 x 3

In [11]:
torch.cuda.empty_cache()

nr_classes = 4
nr_channels = 7
exp_name = 'deeplab_std_resnet_dc_512_3'
WEIGHTS_PATH = 'weights/' + exp_name + '/'
N_EPOCHS = 25
start_epoch = 1
backbone = 'resnet'

cr = 48
dsize = 512
ddepth = 3
os = 16

#######################################################

## Creating the dataset
path_dataset = "/home/philipp/Data/dataset_256_df_177.h5"
#path_dataset = "/media/philipp/DATA/dataset/dataset_256_df_0.h5"
dataset = datasets.ForestDataset(path_dataset, ground_truth='ground_truth_std')

if dataset[0][0].shape[1] == 256:
    chunk_size = 1000
else:
    chunk_size = 0

# shuffle and split
train_sampler, val_sampler, test_sampler = dataset.get_sampler(split=split, \
                shuffle_dataset=True, random_seed=399, chunk_size=chunk_size)

# dataloader
train_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory)
val_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=val_sampler, num_workers=num_workers, pin_memory=pin_memory)
test_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=test_sampler, num_workers=num_workers, pin_memory=pin_memory)
print(len(dataset))
print(len(train_sampler.indices))
print(len(val_sampler.indices))
print(len(test_sampler.indices))

#######################################################

# Network definition
if backbone == 'xception':
    model = deeplab_xception.DeepLabv3_plus(nInputChannels=nr_channels, \
                                            n_classes=nr_classes, os=os, pretrained=False)
elif backbone == 'resnet':
    model = deeplab_resnet_hyper.DeepLabv3_plus(nInputChannels=nr_channels, \
                                                n_classes=nr_classes, os=os, \
                                                pretrained=False, \
                                                cr=cr, \
                                                dsize=dsize, \
                                                ddepth=ddepth)
else:
    raise NotImplementedError

try:
    start_epoch = mgr.load_weights(model, WEIGHTS_PATH + 'latest.pth')
    #load_weights(model, WEIGHTS_PATH+'latest_5d.pt')
    print("weights loaded")
except:
    #model.apply(weights_init)
    #os.makedirs(WEIGHTS_PATH, exist_ok=True) 
    print("no weights found")

torch.cuda.set_device(device=device_nr)
model.cuda()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
#optimizer = optim.SGD(net.parameters(), lr=p['lr'], momentum=p['momentum'], weight_decay=p['wd'])
criterion = losses.Dice_Loss()
#criterion = torch.nn.CrossEntropyLoss()

#######################################################

## Initialising logging
logging = log.Log(exp_name)
start_epoch = 1
logging.start_log(LR, N_EPOCHS)
logging.start_csv()

for epoch in range(1, N_EPOCHS+1):
    since = time.time()
    
    print('-' * 10)
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)

    ### Train ###
    metric = train(model, train_dl, optimizer, criterion, epoch)
    # print metrics to console
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)
    print('Train Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'train')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'train')
    
    ### Test ###
    metric = test(model, val_dl, criterion, epoch) 
    print('-' * 10)
    print('Val Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'val')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'val')
    
    ### Checkpoint ###    
    mgr.save_weights(model, epoch, metric['loss'], metric['acc_all'], WEIGHTS_PATH)

    ### Adjust Lr ###
    mgr.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS)

42716
29908
4268
8540
Constructing DeepLabv3+ model...
Number of classes: 4
Output stride: 16
Number of Input Channels: 7
loading weights 'weights/deeplab_std_resnet_dc_512_3/latest.pth'
loaded weights (lastEpoch 16, loss 0.14879976212978363, error 0.8823333978652954)
weights loaded
----------
Epoch 1/25
----------
Step: 100  Loss: 0.1530  Acc all: 0.8610  Acc mean: 0.8227  IoU: 0.7507  Dice: 0.8471
Step: 200  Loss: 0.1833  Acc all: 0.8611  Acc mean: 0.8399  IoU: 0.7018  Dice: 0.8167
Step: 300  Loss: 0.1441  Acc all: 0.8861  Acc mean: 0.8782  IoU: 0.7600  Dice: 0.8560
Step: 400  Loss: 0.1592  Acc all: 0.8662  Acc mean: 0.8424  IoU: 0.7431  Dice: 0.8409
Epoch 1/25
----------
Train Loss: 0.1588  Acc all: 0.8696  Acc mean: 0.8299  IoU: 0.7425  Dice: 0.8427
----------
Train Time 8m 30s
----------
Val Loss: 0.1579  Acc all: 0.8770  Acc mean: 0.8503  IoU: 0.7553  Dice: 0.8543
----------
Total Time 9m 2s

----------
Epoch 2/25
----------
Step: 100  Loss: 0.1639  Acc all: 0.8741  Acc mean: 0.8

Epoch 13/25
----------
Train Loss: 0.1456  Acc all: 0.8802  Acc mean: 0.8447  IoU: 0.7603  Dice: 0.8559
----------
Train Time 8m 14s
----------
Val Loss: 0.1521  Acc all: 0.8851  Acc mean: 0.8477  IoU: 0.7644  Dice: 0.8601
----------
Total Time 8m 46s

----------
Epoch 14/25
----------
Step: 100  Loss: 0.1072  Acc all: 0.9171  Acc mean: 0.8651  IoU: 0.8131  Dice: 0.8928
Step: 200  Loss: 0.1460  Acc all: 0.8584  Acc mean: 0.8186  IoU: 0.7499  Dice: 0.8540
Step: 300  Loss: 0.1698  Acc all: 0.8529  Acc mean: 0.8083  IoU: 0.7280  Dice: 0.8303
Step: 400  Loss: 0.1237  Acc all: 0.9030  Acc mean: 0.8606  IoU: 0.7918  Dice: 0.8763
Epoch 14/25
----------
Train Loss: 0.1450  Acc all: 0.8806  Acc mean: 0.8441  IoU: 0.7613  Dice: 0.8565
----------
Train Time 8m 11s
----------
Val Loss: 0.1526  Acc all: 0.8849  Acc mean: 0.8452  IoU: 0.7635  Dice: 0.8596
----------
Total Time 8m 43s

----------
Epoch 15/25
----------
Step: 100  Loss: 0.1610  Acc all: 0.8536  Acc mean: 0.8045  IoU: 0.7288  Dice: 0.8

In [12]:
# dsize 1024 x 3

In [17]:
torch.cuda.empty_cache()

In [None]:
torch.cuda.empty_cache()

batch_size = 56
nr_classes = 4
nr_channels = 7
exp_name = 'deeplab_std_resnet_dc_1024_3'
WEIGHTS_PATH = 'weights/' + exp_name + '/'
N_EPOCHS = 25
start_epoch = 1
backbone = 'resnet'

cr = 48
dsize = 1024
ddepth = 3
os = 16

#######################################################

## Creating the dataset
path_dataset = "/home/philipp/Data/dataset_256_df_177.h5"
#path_dataset = "/media/philipp/DATA/dataset/dataset_256_df_0.h5"
dataset = datasets.ForestDataset(path_dataset, ground_truth='ground_truth_std')

if dataset[0][0].shape[1] == 256:
    chunk_size = 1000
else:
    chunk_size = 0

# shuffle and split
train_sampler, val_sampler, test_sampler = dataset.get_sampler(split=split, \
                shuffle_dataset=True, random_seed=399, chunk_size=chunk_size)

# dataloader
train_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory)
val_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=val_sampler, num_workers=num_workers, pin_memory=pin_memory)
test_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=test_sampler, num_workers=num_workers, pin_memory=pin_memory)
print(len(dataset))
print(len(train_sampler.indices))
print(len(val_sampler.indices))
print(len(test_sampler.indices))

#######################################################

# Network definition
if backbone == 'xception':
    model = deeplab_xception.DeepLabv3_plus(nInputChannels=nr_channels, \
                                            n_classes=nr_classes, os=os, pretrained=False)
elif backbone == 'resnet':
    model = deeplab_resnet_hyper.DeepLabv3_plus(nInputChannels=nr_channels, \
                                                n_classes=nr_classes, os=os, \
                                                pretrained=False, \
                                                cr=cr, \
                                                dsize=dsize, \
                                                ddepth=ddepth)
else:
    raise NotImplementedError

try:
    start_epoch = mgr.load_weights(model, WEIGHTS_PATH + 'latest.pth')
    #load_weights(model, WEIGHTS_PATH+'latest_5d.pt')
    print("weights loaded")
except:
    #model.apply(weights_init)
    #os.makedirs(WEIGHTS_PATH, exist_ok=True) 
    print("no weights found")

torch.cuda.set_device(device=device_nr)
model.cuda()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
#optimizer = optim.SGD(net.parameters(), lr=p['lr'], momentum=p['momentum'], weight_decay=p['wd'])
criterion = losses.Dice_Loss()
#criterion = torch.nn.CrossEntropyLoss()

#######################################################

## Initialising logging
logging = log.Log(exp_name)
start_epoch = 1
logging.start_log(LR, N_EPOCHS)
logging.start_csv()

for epoch in range(1, N_EPOCHS+1):
    since = time.time()
    
    print('-' * 10)
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)

    ### Train ###
    metric = train(model, train_dl, optimizer, criterion, epoch)
    # print metrics to console
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)
    print('Train Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'train')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'train')
    
    ### Test ###
    metric = test(model, val_dl, criterion, epoch) 
    print('-' * 10)
    print('Val Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'val')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'val')
    
    ### Checkpoint ###    
    mgr.save_weights(model, epoch, metric['loss'], metric['acc_all'], WEIGHTS_PATH)

    ### Adjust Lr ###
    mgr.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS)

42716
29908
4268
8540
Constructing DeepLabv3+ model...
Number of classes: 4
Output stride: 16
Number of Input Channels: 7
loading weights 'weights/deeplab_std_resnet_dc_1024_3/latest.pth'
loaded weights (lastEpoch 17, loss 0.1500854194164276, error 0.8804186582565308)
weights loaded
----------
Epoch 1/25
----------




In [None]:
# dsize 512 x 4

In [None]:
torch.cuda.empty_cache()

batch_size = 72
nr_classes = 4
nr_channels = 7
exp_name = 'deeplab_std_resnet_dc_512_4'
WEIGHTS_PATH = 'weights/' + exp_name + '/'
N_EPOCHS = 25
start_epoch = 1
backbone = 'resnet'

cr = 48
dsize = 512
ddepth = 4
os = 16

#######################################################

## Creating the dataset
path_dataset = "/home/philipp/Data/dataset_256_df_177.h5"
#path_dataset = "/media/philipp/DATA/dataset/dataset_256_df_0.h5"
dataset = datasets.ForestDataset(path_dataset, ground_truth='ground_truth_std')

if dataset[0][0].shape[1] == 256:
    chunk_size = 1000
else:
    chunk_size = 0

# shuffle and split
train_sampler, val_sampler, test_sampler = dataset.get_sampler(split=split, \
                shuffle_dataset=True, random_seed=399, chunk_size=chunk_size)

# dataloader
train_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory)
val_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=val_sampler, num_workers=num_workers, pin_memory=pin_memory)
test_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=test_sampler, num_workers=num_workers, pin_memory=pin_memory)
print(len(dataset))
print(len(train_sampler.indices))
print(len(val_sampler.indices))
print(len(test_sampler.indices))

#######################################################

# Network definition
if backbone == 'xception':
    model = deeplab_xception.DeepLabv3_plus(nInputChannels=nr_channels, \
                                            n_classes=nr_classes, os=os, pretrained=False)
elif backbone == 'resnet':
    model = deeplab_resnet_hyper.DeepLabv3_plus(nInputChannels=nr_channels, \
                                                n_classes=nr_classes, os=os, \
                                                pretrained=False, \
                                                cr=cr, \
                                                dsize=dsize, \
                                                ddepth=ddepth)
else:
    raise NotImplementedError

try:
    start_epoch = mgr.load_weights(model, WEIGHTS_PATH + 'latest.pth')
    #load_weights(model, WEIGHTS_PATH+'latest_5d.pt')
    print("weights loaded")
except:
    #model.apply(weights_init)
    #os.makedirs(WEIGHTS_PATH, exist_ok=True) 
    print("no weights found")

torch.cuda.set_device(device=device_nr)
model.cuda()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
#optimizer = optim.SGD(net.parameters(), lr=p['lr'], momentum=p['momentum'], weight_decay=p['wd'])
criterion = losses.Dice_Loss()
#criterion = torch.nn.CrossEntropyLoss()

#######################################################

## Initialising logging
logging = log.Log(exp_name)
start_epoch = 1
logging.start_log(LR, N_EPOCHS)
logging.start_csv()

for epoch in range(1, N_EPOCHS+1):
    since = time.time()
    
    print('-' * 10)
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)

    ### Train ###
    metric = train(model, train_dl, optimizer, criterion, epoch)
    # print metrics to console
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)
    print('Train Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'train')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'train')
    
    ### Test ###
    metric = test(model, val_dl, criterion, epoch) 
    print('-' * 10)
    print('Val Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'val')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'val')
    
    ### Checkpoint ###    
    mgr.save_weights(model, epoch, metric['loss'], metric['acc_all'], WEIGHTS_PATH)

    ### Adjust Lr ###
    mgr.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS)

In [None]:
# dsize 512 x 5

In [None]:
torch.cuda.empty_cache()

nr_classes = 4
nr_channels = 7
exp_name = 'deeplab_std_resnet_dc_512_5'
WEIGHTS_PATH = 'weights/' + exp_name + '/'
N_EPOCHS = 25
start_epoch = 1
backbone = 'resnet'

cr = 48
dsize = 512
ddepth = 5
os = 16

#######################################################

## Creating the dataset
path_dataset = "/home/philipp/Data/dataset_256_df_177.h5"
#path_dataset = "/media/philipp/DATA/dataset/dataset_256_df_0.h5"
dataset = datasets.ForestDataset(path_dataset, ground_truth='ground_truth_std')

if dataset[0][0].shape[1] == 256:
    chunk_size = 1000
else:
    chunk_size = 0

# shuffle and split
train_sampler, val_sampler, test_sampler = dataset.get_sampler(split=split, \
                shuffle_dataset=True, random_seed=399, chunk_size=chunk_size)

# dataloader
train_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=train_sampler, num_workers=num_workers, pin_memory=pin_memory)
val_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=val_sampler, num_workers=num_workers, pin_memory=pin_memory)
test_dl = torch.utils.data.DataLoader(dataset, batch_size=batch_size, \
                                sampler=test_sampler, num_workers=num_workers, pin_memory=pin_memory)
print(len(dataset))
print(len(train_sampler.indices))
print(len(val_sampler.indices))
print(len(test_sampler.indices))

#######################################################

# Network definition
if backbone == 'xception':
    model = deeplab_xception.DeepLabv3_plus(nInputChannels=nr_channels, \
                                            n_classes=nr_classes, os=os, pretrained=False)
elif backbone == 'resnet':
    model = deeplab_resnet_hyper.DeepLabv3_plus(nInputChannels=nr_channels, \
                                                n_classes=nr_classes, os=os, \
                                                pretrained=False, \
                                                cr=cr, \
                                                dsize=dsize, \
                                                ddepth=ddepth)
else:
    raise NotImplementedError

try:
    start_epoch = mgr.load_weights(model, WEIGHTS_PATH + 'latest.pth')
    #load_weights(model, WEIGHTS_PATH+'latest_5d.pt')
    print("weights loaded")
except:
    #model.apply(weights_init)
    #os.makedirs(WEIGHTS_PATH, exist_ok=True) 
    print("no weights found")

torch.cuda.set_device(device=device_nr)
model.cuda()
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
#optimizer = optim.SGD(net.parameters(), lr=p['lr'], momentum=p['momentum'], weight_decay=p['wd'])
criterion = losses.Dice_Loss()
#criterion = torch.nn.CrossEntropyLoss()

#######################################################

## Initialising logging
logging = log.Log(exp_name)
start_epoch = 1
logging.start_log(LR, N_EPOCHS)
logging.start_csv()

for epoch in range(1, N_EPOCHS+1):
    since = time.time()
    
    print('-' * 10)
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)

    ### Train ###
    metric = train(model, train_dl, optimizer, criterion, epoch)
    # print metrics to console
    print('Epoch {}/{}'.format(epoch, N_EPOCHS))
    print('-' * 10)
    print('Train Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Train Time {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'train')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'train')
    
    ### Test ###
    metric = test(model, val_dl, criterion, epoch) 
    print('-' * 10)
    print('Val Loss: {:.4f}  Acc all: {:.4f}  Acc mean: {:.4f}  IoU: {:.4f}  Dice: {:.4f}'\
          .format(metric['loss'], metric['acc_all'], metric['acc_mean'], metric['jacc'], metric['dice']))
    print('-' * 10)
    time_elapsed = time.time() - since  
    print('Total Time {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    logging.add_log(epoch, metric, time_elapsed // 60, 'val')
    logging.add_csv(epoch, metric, time_elapsed // 60, 'val')
    
    ### Checkpoint ###    
    mgr.save_weights(model, epoch, metric['loss'], metric['acc_all'], WEIGHTS_PATH)

    ### Adjust Lr ###
    mgr.adjust_learning_rate(LR, LR_DECAY, optimizer, epoch, DECAY_EVERY_N_EPOCHS)