In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook
import pickle
import os
import logging
import time
from IPython.core.debugger import set_trace

import torch
import torch.nn as nn
import torch.nn.functional as F

from dataset import prepare_trainset
from utils import save_checkpoint, load_checkpoint, set_logger
from gpu_utils import set_n_get_device

from model import UNetResNet34, predict_proba


%matplotlib inline

In [2]:
######### Config the training process #########
#device = set_n_get_device("0, 1, 2, 3", data_device_id="cuda:0")#0, 1, 2, 3, IMPORTANT: data_device_id is set to free gpu for storing the model, e.g."cuda:1"
MODEL = 'UNetResNet34'#'RESNET34', 'RESNET18', 'INCEPTION_V3', 'BNINCEPTION', 'SEResnet50'
#AUX_LOGITS = True#False, only for 'INCEPTION_V3'
print('====MODEL ACHITECTURE: %s===='%MODEL)

device = set_n_get_device("0, 3", data_device_id="cuda:0")#0, 1, 2, 3, IMPORTANT: data_device_id is set to free gpu for storing the model, e.g."cuda:1"
multi_gpu = [0, 1]#use 2 gpus

SEED = 1234#5678#4567#3456#2345#1234
debug = True# if True, load 100 samples
IMG_SIZE = 256
BATCH_SIZE = 16
NUM_WORKERS = 24
warm_start, last_checkpoint_path = False, 'checkpoint/%s_%s_v1_seed%s/best.pth.tar'%(MODEL, IMG_SIZE, SEED)
checkpoint_path = 'checkpoint/%s_%s_v1_seed%s'%(MODEL, IMG_SIZE, SEED)
LOG_PATH = 'logging/%s_%s_v1_seed%s.log'%(MODEL, IMG_SIZE, SEED)#
torch.cuda.manual_seed_all(SEED)

NUM_EPOCHS = 100
early_stopping_round = 10#500#50
LearningRate = 0.02#phase1: 0.02, phase2: 0.002

====MODEL ACHITECTURE: UNetResNet34====


## the dataset

In [6]:
train_dl, val_dl = prepare_trainset(BATCH_SIZE, NUM_WORKERS, SEED, IMG_SIZE, debug)

Count of trainset (for training):  900
Count of validset (for training):  100


In [7]:
for i, (image, target) in enumerate(train_dl):
    images = list(_image.to(device=device) for _image in image)
    targets = [{k: v.to(device=device) for k, v in t.items()} for t in target]
    if i == 0:
        break

In [8]:
len(images), len(targets)

(8, 8)

In [22]:
targets[0]

{'boxes': tensor([[ 59.,  17., 100.,  45.]], device='cuda:0'),
 'labels': tensor([1], device='cuda:0'),
 'masks': tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]], device='cuda:0'),
 'image_id': tensor([60], device='cuda:0'),
 'area': tensor([1148.], device='cuda:0', dtype=torch.float64),
 'iscrowd': tensor([0], device='cuda:0')}

In [28]:
#masks.reshape(16, -1).cpu().numpy().max(axis=1)

## the model

In [10]:
from model import UNetResNet34
from model2 import get_model_instance_segmentation

In [11]:
#net = UNetResNet34(debug=True).cuda(device=device)
net = get_model_instance_segmentation(num_classes=2).cuda(device=device)
net = nn.DataParallel(net, device_ids=multi_gpu)

#torch.cuda.set_device(0)
#torch.distributed.init_process_group(backend='nccl', world_size=4, init_method='...')
#net = DistributedDataParallel(net, device_ids=[0], output_device=0)
#torch.distributed.init_process_group(backend="nccl")

#net = nn.parallel.DistributedDataParallel(net, device_ids=multi_gpu)

#checkpoint_path = 'checkpoint/UNetResNet34_256_v1_seed1234/best.pth.tar'
#net, _ = load_checkpoint(checkpoint_path, net)

In [12]:
loss_dict = net(images, targets)
losses = sum(loss for loss in loss_dict.values())
losses.item()



In [None]:
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())

loss_value = losses_reduced.item()

In [32]:
_loss = net.criterion(logit, masks)
_loss

tensor(1.6313, device='cuda:0', grad_fn=<AddBackward0>)

In [33]:
_metric = net.metric(logit, masks)
_metric

tensor(0.0036, device='cuda:0')

In [28]:
# i = 1

# fig = plt.figure(figsize=(8, 5))
# if masks[i].mean()==0:
#     plt.title('Empty mask')
# else:
#     plt.title('See marker')

# ax = fig.add_subplot(1, 2, 1)
# plt.imshow(image.cpu().numpy()[i][0], cmap=plt.cm.bone)
# plt.imshow(masks.cpu().numpy()[i][0], alpha=0.3, cmap="Reds")

# ax = fig.add_subplot(1, 2, 2)
# plt.imshow(image.cpu().numpy()[i][0], cmap=plt.cm.bone)
# plt.imshow((logit>0).float().cpu().detach().numpy()[i][0], alpha=0.3, cmap="Reds")

## the training

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook
import pickle
import os
import logging
import time
from IPython.core.debugger import set_trace

import torch
import torch.nn as nn
import torch.nn.functional as F

from dataset import prepare_trainset
from utils import save_checkpoint, load_checkpoint, set_logger
from gpu_utils import set_n_get_device
from vision_utils import reduce_dict

from model import UNetResNet34, predict_proba
from model2 import get_model_instance_segmentation

%matplotlib inline

In [2]:
######### Define the training process #########
def run_check_net(train_dl, val_dl, multi_gpu=[0, 1]):
    set_logger(LOG_PATH)
    logging.info('\n\n')
    #---
    #if MODEL == 'UNetResNet34':
    #    net = UNetResNet34(debug=False).cuda(device=device)
    net = get_model_instance_segmentation(num_classes=2).cuda(device=device)
    #elif MODEL == 'RESNET18':
    #    net = AtlasResNet18(debug=False).cuda(device=device)

#     for param in net.named_parameters():
#         if param[0][:8] in ['decoder5']:#'decoder5', 'decoder4', 'decoder3', 'decoder2'
#             param[1].requires_grad = False

    # dummy sgd to see if it can converge ...
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()),
                      lr=LearningRate, momentum=0.9, weight_decay=0.0001)
    #optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=0.045)#LearningRate
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', 
                                                           factor=0.5, patience=4,#4 resnet34 
                                                           verbose=False, threshold=0.0001, 
                                                           threshold_mode='rel', cooldown=0, 
                                                           min_lr=0, eps=1e-08)
    #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma=0.9, last_epoch=-1)
    
    if warm_start:
        logging.info('warm_start: '+last_checkpoint_path)
        net, _ = load_checkpoint(last_checkpoint_path, net)
    
    # using multi GPU
    if multi_gpu is not None:
        net = nn.DataParallel(net, device_ids=multi_gpu)

    diff = 0
    best_val_metric = -0.1
    optimizer.zero_grad()
    
    #seed = get_seed()
    #seed = SEED
    #logging.info('aug seed: '+str(seed))
    #ia.imgaug.seed(seed)
    #np.random.seed(seed)
    
    for i_epoch in range(NUM_EPOCHS):
        t0 = time.time()
        # iterate through trainset
        if multi_gpu is not None:
            net.module.set_mode('train')
        else:
            net.train()#net.set_mode('train')
        train_loss_list, train_metric_list = [], []
        #for seed in [1]:#[1, SEED]:#augment raw data with a duplicate one (augmented)
        #seed = get_seed()
        #np.random.seed(seed)
        #ia.imgaug.seed(i//10)
        for i, (image, target) in enumerate(train_dl):
            images = list(_image.to(device=device) for _image in image)
            targets = [{k: v.to(device=device) for k, v in t.items()} for t in target]
            loss_dict = net(images, targets)
            _train_loss = sum(loss for loss in loss_dict.values())
            
            loss_dict_reduced = reduce_dict(loss_dict)
            _train_losses = sum(loss for loss in loss_dict_reduced.values())
            
            train_loss_list.append(_train_losses.item())
            #train_metric_list.append(_train_metric.item())#.detach()

            _train_loss.backward()#_train_loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        train_loss = np.mean(train_loss_list)
        train_metric = 1-train_loss
        #train_metric = np.mean(train_metric_list)

        # compute valid loss & metrics (concatenate valid set in cpu, then compute loss, metrics on full valid set)
        #net.module.set_mode('valid')
        net.train()
        with torch.no_grad():
            val_loss_list, val_metric_list = [], []
            for i, (image, target) in enumerate(val_dl):
                images = list(_image.to(device=device) for _image in image)
                targets = [{k: v.to(device=device) for k, v in t.items()} for t in target]
                loss_dict = net(images, targets)
                _val_loss = sum(loss for loss in loss_dict.values())
                
                loss_dict_reduced = reduce_dict(loss_dict)
                _val_losses = sum(loss for loss in loss_dict_reduced.values())
                
                val_loss_list.append(_val_losses.item())
                #val_metric_list.append(_val_metric.item())#.detach()

            val_loss = np.mean(val_loss_list)
            #val_metric = np.mean(val_metric_list)
            val_metric = 1-val_loss

        # Adjust learning_rate
        scheduler.step(val_metric)
        #
        if val_metric > best_val_metric:
            best_val_metric = val_metric
            is_best = True
            diff = 0
        else:
            is_best = False
            diff += 1
            if diff > early_stopping_round:
                logging.info('Early Stopping: val_metric does not increase %d rounds'%early_stopping_round)
                #print('Early Stopping: val_iou does not increase %d rounds'%early_stopping_round)
                break
        
        #save checkpoint
        checkpoint_dict = \
        {
            'epoch': i,
            'state_dict': net.module.state_dict() if multi_gpu is not None else net.state_dict(),
            'optim_dict' : optimizer.state_dict(),
            'metrics': {'train_loss': train_loss, 'val_loss': val_loss, 
                        'train_metric': train_metric, 'val_metric': val_metric}
        }
        save_checkpoint(checkpoint_dict, is_best=is_best, checkpoint=checkpoint_path)

        #if i_epoch%20==0:
        if i_epoch>-1:
            logging.info('[EPOCH %05d]train_loss, train_metric: %0.5f, %0.5f; val_loss, val_metric: %0.5f, %0.5f; time elapsed: %0.1f min'%(i_epoch, train_loss.item(), train_metric.item(), val_loss.item(), val_metric.item(), (time.time()-t0)/60))
            


In [3]:
######### Config the training process #########
#device = set_n_get_device("0, 1, 2, 3", data_device_id="cuda:0")#0, 1, 2, 3, IMPORTANT: data_device_id is set to free gpu for storing the model, e.g."cuda:1"
MODEL = 'UNetResNet34'#'RESNET34', 'RESNET18', 'INCEPTION_V3', 'BNINCEPTION', 'SEResnet50'
#AUX_LOGITS = True#False, only for 'INCEPTION_V3'
print('====MODEL ACHITECTURE: %s===='%MODEL)

device = set_n_get_device("3", data_device_id="cuda:0")#0, 1, 2, 3, IMPORTANT: data_device_id is set to free gpu for storing the model, e.g."cuda:1"
multi_gpu = None#[0, 1]#use 2 gpus

SEED = 1234#5678#4567#3456#2345#1234
debug = True# if True, load 100 samples
IMG_SIZE = 256
BATCH_SIZE = 8#16
NUM_WORKERS = 24
warm_start, last_checkpoint_path = False, 'checkpoint/%s_%s_v1_seed%s/best.pth.tar'%(MODEL, IMG_SIZE, SEED)
checkpoint_path = 'checkpoint/%s_%s_v1_seed%s'%(MODEL, IMG_SIZE, SEED)
LOG_PATH = 'logging/%s_%s_v1_seed%s.log'%(MODEL, IMG_SIZE, SEED)#
torch.cuda.manual_seed_all(SEED)

NUM_EPOCHS = 100
early_stopping_round = 10#500#50
LearningRate = 0.02#0.001



====MODEL ACHITECTURE: UNetResNet34====


In [4]:
######### Load data #########
train_dl, val_dl = prepare_trainset(BATCH_SIZE, NUM_WORKERS, SEED, IMG_SIZE, debug)

######### Run the training process #########
run_check_net(train_dl, val_dl, multi_gpu=multi_gpu)

Count of trainset (for training):  900
Count of validset (for training):  100





[EPOCH 00000]train_loss, train_metric: nan, nan; val_loss, val_metric: nan, nan; time elapsed: 1.5 min


KeyboardInterrupt: 

## the predict

In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook
import pickle
import os
import logging
import time
from IPython.core.debugger import set_trace

import torch
import torch.nn as nn
import torch.nn.functional as F

from dataset import prepare_trainset
from utils import save_checkpoint, load_checkpoint, set_logger
from gpu_utils import set_n_get_device

from model import UNetResNet34, predict_proba

%matplotlib inline

In [3]:
######### Config the training process #########
#device = set_n_get_device("0, 1, 2, 3", data_device_id="cuda:0")#0, 1, 2, 3, IMPORTANT: data_device_id is set to free gpu for storing the model, e.g."cuda:1"
MODEL = 'UNetResNet34'#'RESNET34', 'RESNET18', 'INCEPTION_V3', 'BNINCEPTION', 'SEResnet50'
#AUX_LOGITS = True#False, only for 'INCEPTION_V3'
print('====MODEL ACHITECTURE: %s===='%MODEL)

device = set_n_get_device("0, 3", data_device_id="cuda:0")#0, 1, 2, 3, IMPORTANT: data_device_id is set to free gpu for storing the model, e.g."cuda:1"
multi_gpu = [0, 1]#use 2 gpus

SEED = 1234#5678#4567#3456#2345#1234
debug = True# if True, load 100 samples
IMG_SIZE = 256
BATCH_SIZE = 16#64 for 256x256, 32 for 512x512
NUM_WORKERS = 24

====MODEL ACHITECTURE: UNetResNet34====


In [4]:
train_dl, val_dl = prepare_trainset(BATCH_SIZE, NUM_WORKERS, SEED, IMG_SIZE, debug)

Count of trainset (for training):  900
Count of validset (for training):  100


In [5]:
# y should be makeup
y_valid = []
for i, (image, masks) in enumerate(val_dl):
    #if i==10:
    #    break
    truth = masks.to(device=device, dtype=torch.float)
    y_valid.append(truth.cpu().numpy())
y_valid = np.concatenate(y_valid, axis=0)
y_valid.shape

(96, 1, 256, 256)

In [35]:
net = UNetResNet34(debug=False).cuda(device=device)

In [36]:
checkpoint_path = 'checkpoint/UNetResNet34_256_v1_seed1234/best.pth.tar'
net, _ = load_checkpoint(checkpoint_path, net)

In [37]:
%%time
# predict_proba
net.set_mode('valid')#.module
preds_valid = predict_proba(net, val_dl, device, multi_gpu=False)



CPU times: user 19.8 s, sys: 3.84 s, total: 23.6 s
Wall time: 2.71 s


In [46]:
y_valid = torch.from_numpy(y_valid).to(device=device)
#preds_valid = torch.zeros(y_valid.size()).to(device=device)
preds_valid = torch.from_numpy(np.expand_dims(preds_valid, 1)).to(device=device)

In [12]:
y_valid.size(), preds_valid.size()

(torch.Size([96, 1, 256, 256]), torch.Size([96, 1, 256, 256]))

In [60]:
iou_pytorch(preds_valid, y_valid)

tensor(0.0104, device='cuda:0')