# Setup dependencies

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm

import torch
from torch.utils.data import DataLoader
from torchmetrics import Accuracy, JaccardIndex, MetricCollection
from torchinfo import summary

from floortrans.loaders import FloorplanSVG
from floortrans.loaders.augmentations import Compose, ResizePaddedTorch, DictToTensor

from model.deeplabv3plus import DeepLabV3Plus

from scipy.stats import f_oneway, shapiro, levene

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Clear GPU cache to avoid memory errors
torch.cuda.empty_cache()

# Set seed and deterministic behavior to ensure reproducibility
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

print('Setup completed')

Setup completed


# Load test dataset

In [2]:
img_shape = (256, 256)

aug = Compose([
    ResizePaddedTorch((0, 0), data_format='dict', size=img_shape), 
    DictToTensor()
])

test_dataset = FloorplanSVG('data/cubicasa5k/', 'test.txt', format='lmdb', augmentations=aug)

test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=0)

print(f'Test dataset size: {len(test_dataset)}')
print(f'Test images shape: {test_dataset[0]["image"].shape}')
print(f'Test labels shape: {test_dataset[0]["label"].shape}')

Test dataset size: 400
Test images shape: torch.Size([3, 256, 256])
Test labels shape: torch.Size([23, 256, 256])


# Setup models

In [3]:
deeplab_base_path = 'best_models/dlv3p_mobilenet_v2_base.pt'

deeplab_base = DeepLabV3Plus(backbone='mobilenet_v2', attention=False)
deeplab_base.load_state_dict(torch.load(deeplab_base_path)['model_state_dict'])
deeplab_base.to(device)

summary(deeplab_base, input_size=(1, 3, img_shape[0], img_shape[1]))

  deeplab_base.load_state_dict(torch.load(deeplab_base_path)['model_state_dict'])


Layer (type:depth-idx)                                       Output Shape              Param #
DeepLabV3Plus                                                [1, 12, 256, 256]         --
├─Backbone: 1-1                                              [1, 24, 64, 64]           --
│    └─MobileNetV2: 2-1                                      --                        1,281,000
│    │    └─Sequential: 3-1                                  --                        2,223,872
├─ASPP: 1-2                                                  [1, 256, 8, 8]            --
│    └─Conv2d: 2-2                                           [1, 256, 8, 8]            327,680
│    └─AtrousConv: 2-3                                       [1, 256, 8, 8]            --
│    │    └─DepthwiseSeparableConv: 3-2                      [1, 256, 8, 8]            339,712
│    └─AtrousConv: 2-4                                       [1, 256, 8, 8]            --
│    │    └─DepthwiseSeparableConv: 3-3                      [1, 256, 8

In [4]:
deeplab_casa_path = 'best_models/dlv3p_mobilenet_v2_ca_sa.pt'

deeplab_casa = DeepLabV3Plus(backbone='mobilenet_v2', attention=True)
deeplab_casa.load_state_dict(torch.load(deeplab_casa_path)['model_state_dict'])
deeplab_casa.to(device)

summary(deeplab_casa, input_size=(1, 3, img_shape[0], img_shape[1]))

  deeplab_casa.load_state_dict(torch.load(deeplab_casa_path)['model_state_dict'])


RuntimeError: Error(s) in loading state_dict for DeepLabV3Plus:
	Missing key(s) in state_dict: "aspp.atrous_conv_rate6.ca.mlp.4.weight", "aspp.atrous_conv_rate6.ca.mlp.4.bias", "aspp.atrous_conv_rate12.ca.mlp.4.weight", "aspp.atrous_conv_rate12.ca.mlp.4.bias", "aspp.atrous_conv_rate18.ca.mlp.4.weight", "aspp.atrous_conv_rate18.ca.mlp.4.bias". 
	size mismatch for aspp.atrous_conv_rate6.ca.mlp.2.weight: copying a param with shape torch.Size([256, 16]) from checkpoint, the shape in current model is torch.Size([128, 16]).
	size mismatch for aspp.atrous_conv_rate6.ca.mlp.2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for aspp.atrous_conv_rate12.ca.mlp.2.weight: copying a param with shape torch.Size([256, 16]) from checkpoint, the shape in current model is torch.Size([128, 16]).
	size mismatch for aspp.atrous_conv_rate12.ca.mlp.2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for aspp.atrous_conv_rate18.ca.mlp.2.weight: copying a param with shape torch.Size([256, 16]) from checkpoint, the shape in current model is torch.Size([128, 16]).
	size mismatch for aspp.atrous_conv_rate18.ca.mlp.2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([128]).

# Evaluate models on test set

In [None]:
def compute_combined_metrics(room_class_metrics, icon_class_metrics, combined_class_freq):
    combined_acc = torch.cat([room_class_metrics['acc'].compute(), icon_class_metrics['acc'].compute()])
    combined_iou = torch.cat([room_class_metrics['iou'].compute(), icon_class_metrics['iou'].compute()])

    combined_mpa = combined_acc.mean()
    combined_miou = combined_iou.mean()

    # fwiou
    total_pixels = combined_class_freq.sum()
    combined_fwiou = (combined_class_freq / total_pixels * combined_iou).sum()

    return {
        'mpa': combined_mpa.item(),
        'cpa': combined_acc.tolist(),
        'miou': combined_miou.item(),
        'fwiou': combined_fwiou.item()
    }


def evaluate(model, model_name, loader, device):
    model.eval()

    # Evaluation metrics 
    room_mpa = Accuracy(task='multiclass', num_classes=12, average='macro').to(device)
    room_cpa = Accuracy(task='multiclass', num_classes=12, average='none').to(device)
    room_miou = JaccardIndex(task='multiclass', num_classes=12, average='macro').to(device)
    room_fwiou = JaccardIndex(task='multiclass', num_classes=12, average='weighted').to(device)

    icon_mpa = Accuracy(task='multiclass', num_classes=11, average='macro').to(device)
    icon_cpa = Accuracy(task='multiclass', num_classes=11, average='none').to(device)
    icon_miou = JaccardIndex(task='multiclass', num_classes=11, average='macro').to(device)
    icon_fwiou = JaccardIndex(task='multiclass', num_classes=11, average='weighted').to(device)

    # For computing combined metrics
    room_class_metrics = MetricCollection({
        'acc': Accuracy(task='multiclass', num_classes=12, average=None),
        'iou': JaccardIndex(task='multiclass', num_classes=12, average=None)     
    }).to(device)

    icon_class_metrics = MetricCollection({
        'acc': Accuracy(task='multiclass', num_classes=11, average=None),
        'iou': JaccardIndex(task='multiclass', num_classes=11, average=None)     
    }).to(device)

    # To compute combined fwiou
    combined_class_freq = torch.zeros(23).to(device)
    
    with torch.no_grad():
        for batch in tqdm(loader, desc=f'Evaluating {model_name}'):
            # Extract input images and labels
            images = batch['image'].float().to(device)
            room_labels = batch['label'][:, 21].long().to(device)
            icon_labels = batch['label'][:, 22].long().to(device)

            # Get raw outputs (omitted heatmap output)
            room_logits, icon_logits, _ = model(images)

            # Get predictions
            room_preds = room_logits.argmax(dim=1)
            icon_preds = icon_logits.argmax(dim=1)

            # Update metrics
            room_mpa(room_preds, room_labels)
            room_cpa(room_preds, room_labels)
            room_miou(room_preds, room_labels)
            room_fwiou(room_preds, room_labels)
            icon_mpa(icon_preds, icon_labels)
            icon_cpa(icon_preds, icon_labels)
            icon_miou(icon_preds, icon_labels)
            icon_fwiou(icon_preds, icon_labels)

            room_class_metrics(room_preds, room_labels)
            icon_class_metrics(icon_preds, icon_labels)

            # Update combined class frequency
            combined_class_freq[:12] += torch.bincount(room_labels.flatten(), minlength=12)
            combined_class_freq[12:] += torch.bincount(icon_labels.flatten(), minlength=11)

    # Get actual metric values and round to 4 decimal places
    room_mpa_val = round(room_mpa.compute().item(), 4)
    room_miou_val = round(room_miou.compute().item(), 4)
    room_fwiou_val = round(room_fwiou.compute().item(), 4)
    icon_mpa_val = round(icon_mpa.compute().item(), 4)
    icon_miou_val = round(icon_miou.compute().item(), 4)
    icon_fwiou_val = round(icon_fwiou.compute().item(), 4)
    
    room_cpa_list = [round(val, 4) for val in room_cpa.compute().tolist()]
    icon_cpa_list = [round(val, 4) for val in icon_cpa.compute().tolist()]

    # Compute combined metrics
    combined_metrics = compute_combined_metrics(room_class_metrics, icon_class_metrics, combined_class_freq)
    combined_mpa = round(combined_metrics['mpa'], 4)
    combined_miou = round(combined_metrics['miou'], 4)
    combined_fwiou = round(combined_metrics['fwiou'], 4)
    combined_cpa = [round(val, 4) for val in combined_metrics['cpa']]

    return {
        'room_mpa': room_mpa_val,
        'room_cpa': room_cpa_list,
        'room_miou': room_miou_val,
        'room_fwiou': room_fwiou_val,
        'icon_mpa': icon_mpa_val,
        'icon_cpa': icon_cpa_list,
        'icon_miou': icon_miou_val,
        'icon_fwiou': icon_fwiou_val,
        'combined_mpa': combined_mpa,
        'combined_cpa': combined_cpa,
        'combined_miou': combined_miou,
        'combined_fwiou': combined_fwiou
    }


In [None]:
deeplab_base_results = evaluate(deeplab_base, 'DeepLabV3+ Base', test_loader, device)
deeplab_casa_results = evaluate(deeplab_casa, 'DeepLabV3+ CA & SA', test_loader, device)

results_dict = {
    'Model': ['Base DeepLabV3+', 'DeepLabV3+ w/ CA & SA'],
    'Room MPA': [deeplab_base_results['room_mpa'], deeplab_casa_results['room_mpa']],
    'Room mIoU': [deeplab_base_results['room_miou'], deeplab_casa_results['room_miou']],
    'Room fWIoU': [deeplab_base_results['room_fwiou'], deeplab_casa_results['room_fwiou']],
    'Icon MPA': [deeplab_base_results['icon_mpa'], deeplab_casa_results['icon_mpa']],
    'Icon mIoU': [deeplab_base_results['icon_miou'], deeplab_casa_results['icon_miou']],
    'Icon fWIoU': [deeplab_base_results['icon_fwiou'], deeplab_casa_results['icon_fwiou']],
    'Combined MPA': [deeplab_base_results['combined_mpa'], deeplab_casa_results['combined_mpa']],
    'Combined mIoU': [deeplab_base_results['combined_miou'], deeplab_casa_results['combined_miou']],
    'Combined fWIoU': [deeplab_base_results['combined_fwiou'], deeplab_casa_results['combined_fwiou']]
}

results_df = pd.DataFrame(results_dict)
results_df

In [None]:
room_classes = ["Background", "Outdoor", "Wall", "Kitchen", "Living Room", "Bedroom", "Bath", "Hallway", "Railing", "Storage", "Garage", "Other rooms"]
icon_classes = ["Empty", "Window", "Door", "Closet", "Electr. Appl.", "Toilet", "Sink", "Sauna bench", "Fire Place", "Bathtub", "Chimney"]
combined_classes = room_classes + icon_classes

room_class_acc = { room_classes[i]: [deeplab_base_results['room_cpa'][i], deeplab_casa_results['room_cpa'][i]] for i in range(len(room_classes)) }
icon_class_acc = { icon_classes[i]: [deeplab_base_results['icon_cpa'][i], deeplab_casa_results['icon_cpa'][i]] for i in range(len(icon_classes)) }
combined_class_acc = { combined_classes[i]: [deeplab_base_results['combined_cpa'][i], deeplab_casa_results['combined_cpa'][i]] for i in range(len(combined_classes)) }

room_class_acc_df = pd.DataFrame(room_class_acc, index=['DeepLabV3+ Base', 'DeepLabV3+ CA & SA'])
icon_class_acc_df = pd.DataFrame(icon_class_acc, index=['DeepLabV3+ Base', 'DeepLabV3+ CA & SA'])
combined_class_acc_df = pd.DataFrame(combined_class_acc, index=['DeepLabV3+ Base', 'DeepLabV3+ CA & SA'])

In [None]:
room_class_acc_df

In [None]:
icon_class_acc_df

# Research Question 1
What is the level of accuracy of the modified DeepLabv3+ with CA and SA modules for segmenting both core floor plan objects and furniture in terms of:
- Class Pixel Accuracy (Class Acc)
- Overall Pixel Accuracy (Acc.)
- Mean Intersection over Union (mIoU)
- Frequency Weighted Intersection over Union (fwIoU)

In [None]:
sop1_agg_results = pd.DataFrame({
    'Model': ['Base DeepLabV3+', 'DeepLabV3+ w/ CA & SA'],
    'mPA': [deeplab_base_results['combined_mpa'], deeplab_casa_results['combined_mpa']],
    'mIoU': [deeplab_base_results['combined_miou'], deeplab_casa_results['combined_miou']],
    'fWIoU': [deeplab_base_results['combined_fwiou'], deeplab_casa_results['combined_fwiou']]
})

sop1_agg_results

In [None]:
sns.catplot(kind='bar', data=sop1_agg_results, x='Model', y='mPA', hue='Model')
plt.show()

In [None]:
sop1_class_results = pd.DataFrame({
    'Class': combined_classes,
    'DeepLabV3+ Base': deeplab_base_results['combined_cpa'],
    'DeepLabV3+ CA & SA': deeplab_casa_results['combined_cpa']
})

sop1_class_results

# Research Question 2
What is the level of accuracy of the modified DeepLabv3+ with CA and SA modules for segmenting only core floor plan objects in terms of:
- Class Pixel Accuracy (Class Acc)
- Overall Pixel Accuracy (Acc.)
- Mean Intersection over Union (mIoU)
- Frequency Weighted Intersection over Union (fwIoU)

In [None]:
sop2_agg_results = pd.DataFrame({
    'Model': ['Base DeepLabV3+', 'DeepLabV3+ w/ CA & SA'],
    'mPA': [deeplab_base_results['room_mpa'], deeplab_casa_results['room_mpa']],
    'mIoU': [deeplab_base_results['room_miou'], deeplab_casa_results['room_miou']],
    'fWIoU': [deeplab_base_results['room_fwiou'], deeplab_casa_results['room_fwiou']]
})

sop2_agg_results

In [None]:
sop2_class_results = pd.DataFrame({
    'Class': room_classes,
    'DeepLabV3+ Base': deeplab_base_results['room_cpa'],
    'DeepLabV3+ w/ CA & SA': deeplab_casa_results['room_cpa']
})

sop2_class_results

# Research Question 3
Is there a significant difference in the performance of the modified DeepLabv3+ with CA and SA modules compared to the unmodified base model in terms of:
- Overall Pixel Accuracy (Acc.)
- Mean Intersection over Union (mIoU)
- Frequency Weighted Intersection over Union (fwIoU)

Compute per-image mPA, mIoU, and fwIoU

In [None]:
def evaluate_per_image(model, model_name, loader, device):
    # Store as tensors for efficient computation
    mpa = torch.tensor([]).to(device)
    miou= torch.tensor([]).to(device)
    fwiou = torch.tensor([]).to(device)

    # Use combined metrics for this
    room_class_metrics = MetricCollection({
        'acc': Accuracy(task='multiclass', num_classes=12, average=None),
        'iou': JaccardIndex(task='multiclass', num_classes=12, average=None)     
    }).to(device)

    icon_class_metrics = MetricCollection({
        'acc': Accuracy(task='multiclass', num_classes=11, average=None),
        'iou': JaccardIndex(task='multiclass', num_classes=11, average=None)     
    }).to(device)

    # To compute combined fwiou
    combined_class_freq = torch.zeros(23).to(device)

    with torch.no_grad():
        for batch in tqdm(loader, desc=f'Evaluating {model_name}'):
            # Extract input images and labels
            images = batch['image'].float().to(device)
            room_labels = batch['label'][:, 21].long().to(device)
            icon_labels = batch['label'][:, 22].long().to(device)

            # Get raw outputs (omitted heatmap output)
            room_logits, icon_logits, _ = model(images)

            # Get predictions
            room_preds = room_logits.argmax(dim=1)
            icon_preds = icon_logits.argmax(dim=1)

            # Update metrics
            room_class_metrics(room_preds, room_labels)
            icon_class_metrics(icon_preds, icon_labels)

            # Update combined class frequency
            combined_class_freq[:12] += torch.bincount(room_labels.flatten(), minlength=12)
            combined_class_freq[12:] += torch.bincount(icon_labels.flatten(), minlength=11)

            # Compute per image metrics
            room_acc = room_class_metrics['acc'].compute().cpu().numpy()
            icon_acc = icon_class_metrics['acc'].compute().cpu().numpy()
            room_iou = room_class_metrics['iou'].compute().cpu().numpy()
            icon_iou = icon_class_metrics['iou'].compute().cpu().numpy()

            combined_acc = torch.cat([torch.tensor(room_acc).to(device), torch.tensor(icon_acc).to(device)])
            combined_iou = torch.cat([torch.tensor(room_iou).to(device), torch.tensor(icon_iou).to(device)])

            mpa_img = combined_acc.mean().item()
            miou_img = combined_iou.mean().item()

            total_pixels = combined_class_freq.sum().item()
            fwiou_img = (combined_class_freq / total_pixels * combined_iou).sum().item()

            # Append to tensors
            mpa = torch.cat([mpa, torch.tensor([mpa_img]).to(device)])
            miou = torch.cat([miou, torch.tensor([miou_img]).to(device)])
            fwiou = torch.cat([fwiou, torch.tensor([fwiou_img]).to(device)])

    return { 'mpa': mpa, 'miou': miou, 'fwiou': fwiou }


base_per_img_res = evaluate_per_image(deeplab_base, 'DeepLabV3+ Base', test_loader, device)
casa_per_img_res = evaluate_per_image(deeplab_casa, 'DeepLabV3+ CA & SA', test_loader, device)

mPA_base = base_per_img_res['mpa'].tolist()
mPA_modified = casa_per_img_res['mpa'].tolist()

mIoU_base = base_per_img_res['miou'].tolist()
mIoU_modified = casa_per_img_res['miou'].tolist()

fwIoU_base = base_per_img_res['fwiou'].tolist()
fwIoU_modified = casa_per_img_res['fwiou'].tolist()

print('mPA')
print('Base:', mPA_base[:5])
print('Mod: ', mPA_modified[:5])
print()

print('mIoU')
print('Base:', mIoU_base[:5])
print('Mod: ', mIoU_modified[:5])
print()

print('fwIoU')
print('Base:', mIoU_base[:5])
print('Mod: ', mIoU_modified[:5])

Statistical Analysis

In [None]:
f_stat_mPA, p_value_mPA = f_oneway(mPA_modified, mPA_base)
f_stat_mIoU, p_value_mIoU = f_oneway(mIoU_modified, mIoU_base)
f_stat_fwIoU, p_value_fwIoU = f_oneway(fwIoU_modified, fwIoU_base)

sop3_results = pd.DataFrame({
    'Metric': ['mPA', 'mIoU', 'fwIoU'],
    'F-Statistic': [f_stat_mPA, f_stat_mIoU, f_stat_fwIoU],
    'P-Value': [p_value_mPA, p_value_mIoU, p_value_fwIoU],
    'Significant Difference': ['Yes' if p_value_mPA < 0.05 else 'No', 
                               'Yes' if p_value_mIoU < 0.05 else 'No', 
                               'Yes' if p_value_fwIoU < 0.05 else 'No']
})

sop3_results

# Save results as spreadsheet

In [None]:
with pd.ExcelWriter('test_results/experiments.xlsx') as writer:
    sop1_agg_results.to_excel(writer, sheet_name='SOP1 Aggregated')
    sop1_class_results.to_excel(writer, sheet_name='SOP1 Class-wise')
    sop2_agg_results.to_excel(writer, sheet_name='SOP2 Aggregated')
    sop2_class_results.to_excel(writer, sheet_name='SOP2 Class-wise')
    sop3_results.to_excel(writer, sheet_name='SOP3')

print('Results saved to spreadsheet')