In [1]:
import numpy as np
from pathlib import Path
from PIL import Image
from torch.utils.data import DataLoader, random_split
import torch
from torch import optim
import torch.nn as nn
from tqdm import tqdm
import torch.nn.functional as F
import logging
from evaluate import evaluate

import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
from collections import OrderedDict

from model.ensemblenet_model import EnsembleNet


from utils.dice_score import dice_loss
from utils.data_load import KittiDataset
from torchsummaryX import summary

In [2]:
Val_Percent = 0.3
Scale_Percent = 1.0
Batch_Size = 8
learning_rate = 0.0001
Pin_Memory = False
epochs = 50


Image_Size = [384, 1216]
Gradient_Clipping = 0.8


Num_Class = 2
Num_Channel = 3
amp = True

Model_Name = 'ensemble_fusion'


Img_Path =  'data/data_road/training/image_2'
Mask_Path =  'data/data_road/training/semantic'

save_checkpoint = True
checkpoint_dir = '../trained' + '_' + Model_Name
batch_size = Batch_Size

In [3]:
dirImg = Path(Img_Path)
dirMask = Path(Mask_Path)
dir_checkpoint = Path(checkpoint_dir)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
datasets =  KittiDataset(dirImg, dirMask, Image_Size, Scale_Percent)
n_val = int(len(datasets) * Val_Percent)
n_train = len(datasets) - n_val
train_set, val_set = random_split(datasets, [n_train, n_val], generator=torch.Generator().manual_seed(0))

loader_args = dict(batch_size=Batch_Size, num_workers= os.cpu_count(), pin_memory=Pin_Memory)
train_loader = DataLoader(train_set, shuffle=True, drop_last = True, **loader_args)
val_loader = DataLoader(val_set, shuffle=False, drop_last=True, **loader_args)

100%|██████████| 289/289 [00:00<00:00, 821.42it/s]


In [6]:
model = EnsembleNet(Model_Name, Num_Channel, Num_Class)
model = model.to(memory_format=torch.channels_last, device = device)

In [7]:
# 4. Set up the optimizer, the loss, the learning rate scheduler and the loss scaling for AMP
#optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=1e-8, momentum=0.9)
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=1e-8, momentum=0.9)

if 'ensemble_voting' in Model_Name:
    unet_optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-8)
    segnet_optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-8)
    enet_optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-8)
    optims = [unet_optimizer, segnet_optimizer, enet_optimizer]
    
    unet_scheduler = optim.lr_scheduler.ReduceLROnPlateau(unet_optimizer, 'max', patience=2)  # goal: maximize Dice score
    segnet_scheduler = optim.lr_scheduler.ReduceLROnPlateau(segnet_optimizer, 'max', patience=2)  # goal: maximize Dice score
    enet_scheduler = optim.lr_scheduler.ReduceLROnPlateau(enet_optimizer, 'max', patience=2)  # goal: maximize Dice score
       
else:
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-10)
    #optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=1e-10)
    optims = [optimizer]
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=2)  # goal: maximize Dice score

grad_scaler = torch.cuda.amp.GradScaler(enabled=amp)
criterion = nn.CrossEntropyLoss()
global_step = 0

In [8]:
def calculate_loss(pred, true_masks, nclass, multiclass):
    loss = criterion(pred, true_masks)
    loss += dice_loss(
        F.softmax(pred, dim=1).float(),
        F.one_hot(true_masks, nclass).permute(0, 3, 1, 2).float(),
        multiclass=multiclass
    )
    return loss

def grad_forback(models, losses, optim):
    optim.zero_grad(set_to_none=True)
    grad_scaler.scale(losses).backward()
    torch.nn.utils.clip_grad_norm_(models.parameters(), Gradient_Clipping)
    grad_scaler.step(optim)
    grad_scaler.update()    

def forward_and_backward(model, images, true_masks, amp, optimizers, grad_scaler, model_name):
    with torch.autocast(device.type if device.type != 'mps' else 'cpu', enabled=amp):
        
        try:
            mn_cls = model.n_classes
        except:
            mn_cls = model.classifier[-1].out_channels

        if model_name == 'ensemble_voting':
            unet_pred, segnet_pred, enet_pred = model(images)
            #deeplab_pred = deeplab_pred['out']
            
            unet_loss = calculate_loss(unet_pred, true_masks, mn_cls, multiclass=True)
            segnet_loss = calculate_loss(segnet_pred, true_masks, mn_cls, multiclass=True)
            enet_loss = calculate_loss(enet_pred, true_masks, mn_cls, multiclass=True)
            
            
        else:
            masks_pred = model(images)
            if isinstance(masks_pred, OrderedDict):
                masks_pred = masks_pred['out']
            loss = calculate_loss(masks_pred, true_masks, mn_cls, multiclass=True)
    
    
    if model_name == 'ensemble_voting':
        for _loss, _optiz in zip([unet_loss, segnet_loss, enet_loss], optimizers):
            grad_forback(model, _loss, _optiz)

        return model, unet_loss, segnet_loss, enet_loss
    else:
        for _loss, _optiz in zip([loss], optimizers):
            grad_forback(model, _loss, _optiz)
            
        return model, loss


In [None]:
valScore_list1 = []
TrainLoss_list1 = []

valScore_list2 = []
TrainLoss_list2 = []

valScore_list3 = []
TrainLoss_list3 = []

valScore_list4 = []
TrainLoss_list4 = []

val_losses = []
val_accs = []
val_mious = []

# 5. Begin training
for epoch in range(1, epochs + 1):
    model.train()
    epoch_loss = 0
    epoch_unet_loss = 0
    epoch_segnet_loss = 0
    epoch_enet_loss = 0
    epoch_voting_loss = 0
    
    with tqdm(total=n_train, desc=f'Epoch {epoch}/{epochs}', unit='img') as pbar:
        for batch in train_loader:
            images, true_masks = batch['image'], batch['mask']

            images = images.to(device=device, dtype=torch.float32, memory_format=torch.channels_last)
            true_masks = true_masks.to(device=device, dtype=torch.long)

                
            result = forward_and_backward(model, images, true_masks, amp, optims, grad_scaler, Model_Name)
            
            if len(result) == 4:
                model, unet_loss, segnet_loss, enet_loss = result
                
                pbar.update(images.shape[0])
                global_step += 1
                epoch_unet_loss += unet_loss.item()
                epoch_segnet_loss += segnet_loss.item()
                epoch_enet_loss += enet_loss.item()
                vot_loss = ((unet_loss.item() + segnet_loss.item() + enet_loss.item()) /3)
                epoch_voting_loss += vot_loss
                
                
            elif len(result) == 2:
                model, loss = result
                
                pbar.update(images.shape[0])
                global_step += 1
                epoch_loss += loss.item()


        print('***')
        if len(result) == 4:
            print('Unet Loss: {}     Segnet Loss: {}     Enet Loss: {}'.format(unet_loss, segnet_loss, enet_loss))
            print('Voting Loss: {}'.format(vot_loss))
            
            
        elif len(result) == 2:
            print('{} Loss: {}'.format(Model_Name, loss))

        # Evaluation round
        division_step = (n_train // (5 * batch_size))
        if division_step > 0:
            #if global_step % division_step == 0:
            if len(result) == 4:
                unet_val_score, segnet_val_score, enet_val_score, voting_val_score, val_loss, val_acc, val_miou = evaluate(model, val_loader, criterion, device, Model_Name, amp)
                
                unet_scheduler.step(unet_val_score)
                segnet_scheduler.step(segnet_val_score)
                enet_scheduler.step(enet_val_score)
                #voting_scheduler.step(voting_val_score)
                
                valScore_list1.append(unet_val_score.cpu().detach().numpy())
                TrainLoss_list1.append(unet_loss.cpu().detach().numpy())
                valScore_list2.append(segnet_val_score.cpu().detach().numpy())
                TrainLoss_list2.append(segnet_loss.cpu().detach().numpy())                
                valScore_list3.append(enet_val_score.cpu().detach().numpy())
                TrainLoss_list3.append(enet_loss.cpu().detach().numpy())
                valScore_list4.append(voting_val_score.cpu().detach().numpy())
                TrainLoss_list4.append(vot_loss)
                
                val_losses.append(val_loss)
                val_accs.append(val_acc)
                val_mious.append(val_miou)
                
                print('---')
                print('Unet Validation Dice Score: {}     Segnet Validation Dice Score: {}     Enet Validation Dice Score: {}'.format(unet_val_score, segnet_val_score, enet_val_score))
                print('---')
                print('Ensemble Voting Validation Dice Loss: {}'.format(val_loss))
                print('Ensemble Voting Validation Pixel Accuracy: {} '.format(val_acc))
                print('Ensemble Voting Validation MIoU: {}'.format(val_miou))                
                print('Ensemble Voting Validation Dice Score: {} '.format(voting_val_score))
                
            else:
                val_score, val_loss, val_acc, val_miou = evaluate(model, val_loader, criterion, device, Model_Name, amp)
                
                                
                scheduler.step(val_score)
                
                print('---')
                print('{} Validation Dice Loss: {}'.format(Model_Name, val_loss))   
                print('{} Validation Pixel Accuracy: {}'.format(Model_Name, val_acc))
                print('{} Validation MIoU: {}'.format(Model_Name, val_miou))
                print('{} Validation Dice Score: {}'.format(Model_Name, val_score))
                
            
                valScore_list1.append(val_score.cpu().detach().numpy())
                TrainLoss_list1.append(loss.cpu().detach().numpy())
                val_losses.append(val_loss)
                val_accs.append(val_acc)
                val_mious.append(val_miou)

        
    if save_checkpoint:
        Path(dir_checkpoint).mkdir(parents=True, exist_ok=True)
        torch.save(model.state_dict(), str(dir_checkpoint / 'checkpoint_epoch{}.pth'.format(epoch + 1)))

Epoch 1/50:  99%|█████████▊| 200/203 [00:18<00:00, 15.12img/s]

***
ensemble_fusion Loss: 1.1529762744903564


Epoch 1/50:  99%|█████████▊| 200/203 [00:23<00:00,  8.60img/s]

---
ensemble_fusion Validation Dice Loss: 1.175976276397705
ensemble_fusion Validation Pixel Accuracy: 0.7768300374348959
ensemble_fusion Validation MIoU: 0.45547409575343445
ensemble_fusion Validation Dice Score: 0.2870611250400543



Epoch 2/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.06img/s]

***
ensemble_fusion Loss: 1.084697961807251


Epoch 2/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.09img/s]

---
ensemble_fusion Validation Dice Loss: 1.2270091772079468
ensemble_fusion Validation Pixel Accuracy: 0.5818692926774945
ensemble_fusion Validation MIoU: 0.38386654023974276
ensemble_fusion Validation Dice Score: 0.45292624831199646



Epoch 3/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.19img/s]

***
ensemble_fusion Loss: 0.9955959916114807


Epoch 3/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.14img/s]

---
ensemble_fusion Validation Dice Loss: 1.0537610054016113
ensemble_fusion Validation Pixel Accuracy: 0.782973707767955
ensemble_fusion Validation MIoU: 0.5707466366949495
ensemble_fusion Validation Dice Score: 0.601093053817749



Epoch 4/50:  99%|█████████▊| 200/203 [00:15<00:00, 14.99img/s]

***
ensemble_fusion Loss: 0.9095645546913147


Epoch 4/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.12img/s]

---
ensemble_fusion Validation Dice Loss: 1.0455946922302246
ensemble_fusion Validation Pixel Accuracy: 0.8180392081277412
ensemble_fusion Validation MIoU: 0.6126365728211842
ensemble_fusion Validation Dice Score: 0.6421879529953003



Epoch 5/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.20img/s]

***
ensemble_fusion Loss: 0.898144006729126


Epoch 5/50:  99%|█████████▊| 200/203 [00:20<00:00,  9.88img/s]

---
ensemble_fusion Validation Dice Loss: 0.9860247373580933
ensemble_fusion Validation Pixel Accuracy: 0.8408676950555098
ensemble_fusion Validation MIoU: 0.6442538896903762
ensemble_fusion Validation Dice Score: 0.6574466228485107



Epoch 6/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.06img/s]

***
ensemble_fusion Loss: 0.9156389236450195


Epoch 6/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.13img/s]

---
ensemble_fusion Validation Dice Loss: 0.9228482246398926
ensemble_fusion Validation Pixel Accuracy: 0.8582455819113213
ensemble_fusion Validation MIoU: 0.6674178502685439
ensemble_fusion Validation Dice Score: 0.6702505946159363



Epoch 7/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.20img/s]

***
ensemble_fusion Loss: 0.8815439343452454


Epoch 7/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.22img/s]

---
ensemble_fusion Validation Dice Loss: 0.960645854473114
ensemble_fusion Validation Pixel Accuracy: 0.86207634106017
ensemble_fusion Validation MIoU: 0.6720503535391305
ensemble_fusion Validation Dice Score: 0.6755055785179138



Epoch 8/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.09img/s]

***
ensemble_fusion Loss: 0.8754761219024658


Epoch 8/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.15img/s]

---
ensemble_fusion Validation Dice Loss: 0.9144254326820374
ensemble_fusion Validation Pixel Accuracy: 0.8703640051055372
ensemble_fusion Validation MIoU: 0.6869459126666466
ensemble_fusion Validation Dice Score: 0.6767213940620422



Epoch 9/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.14img/s]

***
ensemble_fusion Loss: 0.8371782302856445


Epoch 9/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.25img/s]

---
ensemble_fusion Validation Dice Loss: 0.8919157981872559
ensemble_fusion Validation Pixel Accuracy: 0.8833414713541666
ensemble_fusion Validation MIoU: 0.7070208070276468
ensemble_fusion Validation Dice Score: 0.6934493780136108



Epoch 10/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.03img/s]

***
ensemble_fusion Loss: 0.779564380645752


Epoch 10/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.20img/s]

---
ensemble_fusion Validation Dice Loss: 0.8403128981590271
ensemble_fusion Validation Pixel Accuracy: 0.9219877008806195
ensemble_fusion Validation MIoU: 0.7752322074104783
ensemble_fusion Validation Dice Score: 0.7549979090690613



Epoch 11/50:  99%|█████████▊| 200/203 [00:15<00:00, 14.79img/s]

***
ensemble_fusion Loss: 0.8066568374633789


Epoch 11/50:  99%|█████████▊| 200/203 [00:20<00:00,  9.98img/s]

---
ensemble_fusion Validation Dice Loss: 0.8341012001037598
ensemble_fusion Validation Pixel Accuracy: 0.9421753465083608
ensemble_fusion Validation MIoU: 0.8038545597663196
ensemble_fusion Validation Dice Score: 0.7904278039932251



Epoch 12/50:  99%|█████████▊| 200/203 [00:15<00:00, 14.97img/s]

***
ensemble_fusion Loss: 0.7830836772918701


Epoch 12/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.07img/s]

---
ensemble_fusion Validation Dice Loss: 0.8547970056533813
ensemble_fusion Validation Pixel Accuracy: 0.939406813236705
ensemble_fusion Validation MIoU: 0.8128713767795228
ensemble_fusion Validation Dice Score: 0.8580529093742371



Epoch 13/50:  99%|█████████▊| 200/203 [00:15<00:00, 14.79img/s]

***
ensemble_fusion Loss: 0.7930216789245605


Epoch 13/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.15img/s]

---
ensemble_fusion Validation Dice Loss: 0.8015462756156921
ensemble_fusion Validation Pixel Accuracy: 0.9457100316097862
ensemble_fusion Validation MIoU: 0.803750065988386
ensemble_fusion Validation Dice Score: 0.7817147374153137



Epoch 14/50:  99%|█████████▊| 200/203 [00:15<00:00, 14.94img/s]

***
ensemble_fusion Loss: 0.713423490524292


Epoch 14/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.16img/s]

---
ensemble_fusion Validation Dice Loss: 0.8042080402374268
ensemble_fusion Validation Pixel Accuracy: 0.9491047106291118
ensemble_fusion Validation MIoU: 0.8306943736241934
ensemble_fusion Validation Dice Score: 0.8680435419082642



Epoch 15/50:  99%|█████████▊| 200/203 [00:15<00:00, 14.83img/s]

***
ensemble_fusion Loss: 0.7322964072227478


Epoch 15/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.10img/s]

---
ensemble_fusion Validation Dice Loss: 0.8606986999511719
ensemble_fusion Validation Pixel Accuracy: 0.9367657042386239
ensemble_fusion Validation MIoU: 0.8091071709031331
ensemble_fusion Validation Dice Score: 0.8710569739341736



Epoch 16/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.13img/s]

***
ensemble_fusion Loss: 0.7509900331497192


Epoch 16/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.05img/s]

---
ensemble_fusion Validation Dice Loss: 0.8172003626823425
ensemble_fusion Validation Pixel Accuracy: 0.9427380478172972
ensemble_fusion Validation MIoU: 0.8215522103318819
ensemble_fusion Validation Dice Score: 0.8588534593582153



Epoch 17/50:  99%|█████████▊| 200/203 [00:15<00:00, 15.08img/s]

***
ensemble_fusion Loss: 0.703508734703064


Epoch 17/50:  99%|█████████▊| 200/203 [00:19<00:00, 10.11img/s]

---
ensemble_fusion Validation Dice Loss: 0.7517197728157043
ensemble_fusion Validation Pixel Accuracy: 0.9604874995716831
ensemble_fusion Validation MIoU: 0.8609811770297158
ensemble_fusion Validation Dice Score: 0.8837653398513794



Epoch 18/50:  20%|█▉        | 40/203 [00:05<00:15, 10.63img/s]

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df = pd.DataFrame([TrainLoss_list1, val_losses, valScore_list1, val_accs, val_mious]).T
df.columns = ['train_loss', 'val_loss', 'val_score', 'val_acc', 'val_miou']
df.to_csv(checkpoint_dir + '/model_check.csv', encoding = 'UTF-8')

In [None]:
plt.figure(figsize= (10,5))
plt.plot(TrainLoss_list1)
plt.plot(val_losses)

In [None]:
plt.figure(figsize= (10,5))
plt.plot(valScore_list1)
plt.plot(val_accs)
plt.plot(val_mious)