# Environment Setup

In [1]:
import torch
from torch.utils.data import DataLoader, Subset
from torchvision.transforms import RandomChoice
from torchinfo import summary

import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from floortrans.loaders import FloorplanSVG
from floortrans.loaders.augmentations import (RandomCropToSizeTorch,
                                              ResizePaddedTorch,
                                              Compose,
                                              DictToTensor,
                                              ColorJitterTorch,
                                              RandomRotations)

from models.deeplabv3plus import DeepLabV3Plus
from evaluation_metrics import Metrics, timer
from visualizations import Plot

In [2]:
# Release GPU memory
torch.cuda.empty_cache()
print('GPU memory has been released.')

GPU memory has been released.


In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print(f'Using device: {torch.cuda.get_device_name(0)}')
else:
    device = torch.device('cpu')
    print('Using device: CPU')

Using device: NVIDIA GeForce RTX 3060 Laptop GPU


# Data Preprocessing and Augmentations

In [4]:
SCALE = False
IMAGE_SIZE = 256

if SCALE:
    aug = Compose([RandomChoice([RandomCropToSizeTorch(data_format='dict', size=(IMAGE_SIZE, IMAGE_SIZE)),
                                    ResizePaddedTorch((0, 0), data_format='dict', size=(IMAGE_SIZE, IMAGE_SIZE))]),
                    RandomRotations(format='cubi'),
                    DictToTensor(),
                    ColorJitterTorch()])
else:
    aug = Compose([RandomCropToSizeTorch(data_format='dict', size=(IMAGE_SIZE, IMAGE_SIZE)),
                    RandomRotations(format='cubi'),
                    DictToTensor(),
                    ColorJitterTorch()])

# Dataset

In [5]:
DATA_PATH = 'data/cubicasa5k/'
TRAIN_PATH = 'train.txt'
VAL_PATH = 'val.txt'
FORMAT = 'lmdb'

full_train_set = FloorplanSVG(DATA_PATH, 
                         TRAIN_PATH, 
                         format=FORMAT, 
                         augmentations=aug)

# Use this in the meantime to prevent kernel dying
train_set = Subset(full_train_set, list(range(1000)))

val_set = FloorplanSVG(DATA_PATH, 
                       VAL_PATH, 
                       format=FORMAT, 
                       augmentations=DictToTensor())

print('Train set size: ', len(train_set))
print('Validation set size: ', len(val_set))

Train set size:  1000
Validation set size:  400


In [6]:
sample = train_set[0]
print('Image: ', sample['image'])
print('Label: ', sample['label'])

Image:  tensor([[[1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         ...,
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886]],

        [[1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         ...,
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886]],

        [[1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.1886, 1.1886],
         [1.1886, 1.1886, 1.1886,  ..., 1.1886, 1.

In [7]:
print('Image shape: ', sample['image'].shape)
print('Label shape: ', sample['label'].shape)

Image shape:  torch.Size([3, 256, 256])
Label shape:  torch.Size([23, 256, 256])


# DataLoader

In [12]:
NUM_WORKERS = 0
BATCH_SIZE = 20

train_loader = DataLoader(train_set, 
                          batch_size=BATCH_SIZE, 
                          num_workers=NUM_WORKERS, 
                          shuffle=True, 
                        #   pin_memory=True
                          )

val_loader = DataLoader(val_set, 
                        batch_size=BATCH_SIZE, 
                        num_workers=NUM_WORKERS, 
                        # pin_memory=True
                        )

print(f'Length of train dataloader: {len(train_loader)} batches of size {BATCH_SIZE}')
print(f'Length of val dataloader: {len(val_loader)} batches of size {BATCH_SIZE}')

Length of train dataloader: 50 batches of size 20
Length of val dataloader: 20 batches of size 20


# Model Setup

For reference, here are the 23 classes:  

- **Rooms (12):** "Background", "Outdoor", "Wall", "Kitchen", "Living Room" ,"Bed Room", "Bath", "Entry", "Railing", "Storage", "Garage", "Undefined"  

- **Icons (11):** "No Icon", "Window", "Door", "Closet", "Electrical Applience" ,"Toilet", "Sink", "Sauna Bench", "Fire Place", "Bathtub", "Chimney"

In [13]:
NUM_CLASSES = 23

model = DeepLabV3Plus(backbone='mobilenetv2', attention=False, num_classes=NUM_CLASSES).to(device)

summary(model, input_size=(BATCH_SIZE, 3, IMAGE_SIZE, IMAGE_SIZE))

Layer (type:depth-idx)                                       Output Shape              Param #
DeepLabV3Plus                                                [20, 23, 256, 256]        --
├─Backbone: 1-1                                              [20, 24, 64, 64]          --
│    └─MobileNetV2: 2-1                                      --                        1,281,000
│    │    └─Sequential: 3-1                                  --                        2,223,872
├─ASPP: 1-2                                                  [20, 256, 8, 8]           --
│    └─Conv2d: 2-2                                           [20, 256, 8, 8]           327,680
│    └─AtrousConv: 2-3                                       [20, 256, 8, 8]           --
│    │    └─DepthwiseSeparableConv: 3-2                      [20, 256, 8, 8]           339,712
│    └─AtrousConv: 2-4                                       [20, 256, 8, 8]           --
│    │    └─DepthwiseSeparableConv: 3-3                      [20, 256, 

# Training Hyperparameters

In [14]:
# INITIAL TRAINING
EPOCHS = 50
CRITERION = torch.nn.CrossEntropyLoss()
OPTIMIZER = torch.optim.Adam(model.parameters(), lr=0.001)

In [11]:
# EPOCHS = 100

# CRITERION = torch.nn.CrossEntropyLoss()

# initial_lr = 0.001
# OPTIMIZER = torch.optim.SGD(model.parameters(), lr=initial_lr, momentum=0.95, weight_decay=1e-4, nesterov=True)

# # Poly learning rate policy (used in DeepLabV3+ paper)
# class PolyLR(torch.optim.lr_scheduler._LRScheduler):
#     def __init__(self, optimizer, max_iters, power=0.9, last_epoch=-1):
#         self.max_iters = max_iters
#         self.power = power
#         super(PolyLR, self).__init__(optimizer, last_epoch)

#     def get_lr(self):
#         return [base_lr * (1 - self.last_epoch / self.max_iters) ** self.power for base_lr in self.base_lrs]

# max_iters = EPOCHS * len(train_loader)
# SCHEDULER = PolyLR(OPTIMIZER, max_iters)

# Training and Validation Loop

In [15]:
def train(model, 
          train_loader, 
          val_loader, 
          epochs,
          num_classes,
          loss_fn, 
          optimizer, 
        #   scheduler,
          device,
          early_stop_threshold=10,
          save_prefix='deeplabv3plus',
          save_path='saved_models'):

    train_loss = []
    train_pixel_acc = [] 
    train_iou = []
    
    val_loss = []
    val_pixel_acc = []
    val_iou = []
    
    train_len = len(train_loader)
    val_len = len(val_loader)
    
    best_loss = np.inf
    not_improve = 0
    early_stop_threshold = early_stop_threshold
    
    os.makedirs(save_path, exist_ok=True)

    # Training loop
    train_start = timer()
    print('Start training process...')

    for epoch in range(1, epochs + 1):
        tic = timer()
        train_loss_ = 0
        train_iou_ = 0 
        train_pixel_acc_ = 0

        print(f'Epoch {epoch} train process is started...')
        model.train()
        
        for idx, batch in enumerate(tqdm(train_loader)):

            imgs = batch['image']
            gts = batch['label']
            imgs, gts = imgs.to(device), gts.to(device)
            
            optimizer.zero_grad()  # Clear the gradients first
            
            preds = model(imgs)

            train_metrics = Metrics(preds, gts, loss_fn, num_classes=num_classes)
            loss = train_metrics.loss()
            train_iou_ += train_metrics.mIOU()
            train_pixel_acc_ += train_metrics.PixelAcc()
            train_loss_ += loss.item()

            loss.backward()  # Backpropagate current batch loss
            optimizer.step() # Update weights

        # Update learning rate using PolyLR
        # scheduler.step()
        
        # Validation loop
        print(f'Epoch {epoch} validation process is started...')
        model.eval()
        
        val_loss_ = 0
        val_iou_ = 0
        val_pixel_acc_ = 0

        with torch.no_grad():
            for idx, batch in enumerate(tqdm(val_loader)):

                imgs = batch['image']
                gts = batch['label']
                imgs, gts = imgs.to(device), gts.to(device)
                
                preds = model(imgs)

                val_metrics = Metrics(preds, gts, loss_fn, num_classes=num_classes)
                val_loss_ += val_metrics.loss().item()
                val_iou_ += val_metrics.mIOU()
                val_pixel_acc_ += val_metrics.PixelAcc()

        print(f'Epoch {epoch} train process is completed.')

        train_loss_ /= train_len
        train_iou_ /= train_len
        train_pixel_acc_ /= train_len

        val_loss_ /= val_len
        val_iou_ /=  val_len
        val_pixel_acc_ /=   val_len

        print('\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
        print(f'\nEpoch {epoch} train process results: \n')
        print(f'Train Time         -> {timer(tic):.3f} secs')
        print(f'Train Loss         -> {train_loss_:.3f}')
        print(f'Train PA           -> {train_pixel_acc_:.3f}')
        print(f'Train IoU          -> {train_iou_:.3f}')
        print(f'Validation Loss    -> {val_loss_:.3f}')
        print(f'Validation PA      -> {val_pixel_acc_:.3f}')
        print(f'Validation IoU     -> {val_iou_:.3f}')
        # print(f'Current LR         -> {scheduler.get_last_lr()[0]:.6f}\n')

        train_loss.append(train_loss_)
        train_iou.append(train_iou_)
        train_pixel_acc.append(train_pixel_acc_)

        val_loss.append(val_loss_)
        val_iou.append(val_iou_)
        val_pixel_acc.append(val_pixel_acc_)

        if val_loss_ < best_loss:
            print(f'Loss decreased from {best_loss:.3f} to {val_loss_:.3f}!')
            best_loss = val_loss_
            not_improve = 0  # Reset counter
            print('Saving the model with the best loss value...')
            torch.save(model.state_dict(), f'{save_path}/{save_prefix}_best_model.pt')
        else:
            not_improve += 1
            print(f'Loss did not decrease for {not_improve} epoch(s)!')
            if not_improve >= early_stop_threshold:
                print(f'Stopping training process because loss did not decrease for {early_stop_threshold} epochs!')
                break
        print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n')

    print(f'Train process is completed in {(timer(train_start)) / 60:.3f} minutes.')

    return {'tr_loss': train_loss, 'tr_iou': train_iou, 'tr_pa': train_pixel_acc,
            'val_loss': val_loss, 'val_iou': val_iou, 'val_pa' : val_pixel_acc}


# Training setup
history = train(
    model,
    train_loader,
    val_loader,
    EPOCHS,
    NUM_CLASSES,
    CRITERION,
    OPTIMIZER,
    # SCHEDULER,
    device,
    save_prefix='deeplabv3plus_cubicasa',
)

Start training process...
Epoch 1 train process is started...


100%|██████████| 50/50 [01:48<00:00,  2.16s/it]


Epoch 1 validation process is started...


  0%|          | 0/20 [00:06<?, ?it/s]


RuntimeError: stack expects each tensor to be equal size, but got [3, 619, 1319] at entry 0 and [3, 865, 489] at entry 1

# Visualizing Performance

In [None]:
Plot(history)

# Testing 

In [None]:
# class Test():
#     def __init__(self, model, test_loader, loss_fn, device):
#         self.model = model
#         self.test_loader = test_loader
#         self.loss_fn = loss_fn
#         self.device = device
    
#     def run(self):
#         self.model.eval()
#         test_loss = 0
#         test_iou = 0
#         test_pixel_acc = 0
#         test_len = len(self.test_loader)

#         imgs = []
#         gts = []
#         preds = []

#         with torch.no_grad():
#             for batch in tqdm(self.test_loader):
#                 imgs_batch = batch['image']
#                 gts_batch = batch['label']
#                 imgs_batch, gts_batch = imgs_batch.to(self.device), gts_batch.to(self.device)

#                 # Forward pass
#                 preds_batch = self.model(imgs_batch)
                
#                 # Calculate metrics
#                 metrics = Metrics(preds_batch, gts_batch, self.loss_fn)
#                 test_loss += metrics.loss().item()
#                 test_iou += metrics.mIOU()
#                 test_pixel_acc += metrics.PixelAcc()

#                 # Collect data for visualization
#                 preds_batch = torch.argmax(preds_batch, dim=1)
#                 imgs.extend(imgs_batch.cpu())
#                 gts.extend(gts_batch.cpu())
#                 preds.extend(preds_batch.cpu())

#         # Calculate average metrics
#         test_loss /= test_len
#         test_iou /= test_len
#         test_pixel_acc /= test_len

#         return imgs, gts, preds, test_loss, test_iou, test_pixel_acc


# test = Test(model, test_loader, CRITERION, device)
# imgs, gts, preds, test_loss, test_iou, test_pixel_acc = test.run()

# print(f"Test Loss: {test_loss:.4f}")
# print(f"Test mIoU: {test_iou:.4f}")
# print(f"Test PA: {test_pixel_acc:.4f}")