In [1]:
import sys
import os
from pathlib import Path

# Add project root to path
notebook_dir = Path().resolve()
project_root = notebook_dir.parent

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
os.chdir(project_root)

In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import tifffile

from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    f1_score,
    accuracy_score,
)
from skimage.transform import resize
from torch.utils.data import DataLoader

from src.data.dataset import PixelClassificationDataset
from src.utils.helpers import get_image_and_mask_paths, compare_two_images
from src.models.model_loader import ModelLoader

device = torch.device("mps" if torch.mps.is_available() else "cpu")
print(f"Using device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Using device: mps


  check_for_updates()


# Setup

In [3]:
DATA_DIR = "data/cnn_training/resized_images"
MASK_DIR = "data/cnn_training/resized_masks"
ORG_RES = (1920, 2560)

In [4]:
train_image_paths, train_mask_paths, test_image_paths, test_mask_paths = (
    get_image_and_mask_paths(DATA_DIR, MASK_DIR)
)

# Evaluate three class predictions qualitatively per model

In [5]:
test_dataset = PixelClassificationDataset(
    image_paths=test_image_paths,
    mask_paths=test_mask_paths,
    transform=None,
    augmentations_per_image=1,
    include_original=True,
    binary_mode=False
)

In [6]:
three_class_loader = ModelLoader(project_root)

# Load all models for three classes
three_class_loader.load_all_models({
    "UNet_3": ("unet_3", "cnn"),
    "UNet++_3": ("unet++_3", "cnn"),
    "DeepLabV3_3": ("deeplabv3_3", "cnn"),
    "UNet_basic_3": ("unet_basic_3", "cnn"),
    "Random Forest": ("random_forest", "rf"),
})

model_names = three_class_loader.list_models()

Loaded CNN: UNet_3
Loaded CNN: UNet++_3
Loaded CNN: DeepLabV3_3
Loaded CNN: UNet_basic_3
Model loaded from: /Users/levin/Documents/Uni/Master/semester_3/research_project/ASON/checkpoints/random_forest_balanced.joblib
Loaded RF: Random Forest
Loaded 5 models total
Loaded Models:
------------------------------------------------------------
UNet_3                         [cnn] unet_resnet34_imagenet_3c.pth
UNet++_3                       [cnn] unet++_resnet34_imagenet_3c.pth
DeepLabV3_3                    [cnn] deeplabv3_resnet34_imagenet_3c.pth
UNet_basic_3                   [cnn] unet_no_weights_3c.pth
Random Forest                  [rf ] random_forest_balanced.joblib
------------------------------------------------------------


In [14]:
# for model_name in model_names:
#     model = three_class_loader.get_model(model_name)
#     model_type = model_names[model_name]
#     print("="*100)
#     print(f"Evaluating model: {model_name}")
#     for i in range(len(test_dataset)):
#         test_image_tensor, test_mask_tensor = test_dataset[i]

#         if model_type == "rf":
#             test_image = test_image_tensor.permute(1, 2, 0).cpu().numpy()
#             test_image = (test_image * 255).astype(np.uint8)
#             pred_mask = model(test_image)
        
#         else:
#             test_image_tensor = test_image_tensor.unsqueeze(0).to(device)

#             with torch.no_grad():
#                 pred_logits = model(test_image_tensor)
#                 pred_mask = torch.argmax(pred_logits, dim=1).squeeze()
#                 pred_mask = pred_mask.cpu().numpy()

#         test_mask = test_mask_tensor.numpy()
#         original_image = tifffile.imread(test_image_paths[i])
        
#         plt.imshow(original_image)
#         plt.axis("off")
#         plt.title("Original Image")
#         plt.show()

#         compare_two_images(pred_mask, test_mask, "Predicted Mask", "Original Mask")
        
#         pred_mask_org = resize(pred_mask, ORG_RES, anti_aliasing=True)
#         test_mask_org = resize(test_mask, ORG_RES, anti_aliasing=True)
#         compare_two_images(pred_mask_org, test_mask_org, "Predicted Mask ORG", "Original Mask ORG")

In [15]:
# for i in range(len(test_dataset)):
#     test_image_tensor, test_mask_tensor = test_dataset[i]
#     test_mask = test_mask_tensor.numpy()
#     test_mask_org = resize(test_mask, ORG_RES, anti_aliasing=True)
#     plt.figure(figsize=(60, 10))
#     plt.subplot(1, 6, 1)
#     plt.imshow(test_mask_org)
#     plt.axis("off")
#     plt.title("Original Mask")

#     test_image = test_image_tensor.permute(1, 2, 0).cpu().numpy()
#     test_image = (test_image * 255).astype(np.uint8)

#     test_image_tensor = test_image_tensor.unsqueeze(0).to(device)
    
#     for i, model_name in enumerate(model_names):
#         model = three_class_loader.get_model(model_name)
#         model_type = model_names[model_name]
        
#         if model_type == "rf":
#             pred_mask = model(test_image)
        
#         else:
#             with torch.no_grad():
#                 pred_logits = model(test_image_tensor)
#                 pred_mask = torch.argmax(pred_logits, dim=1).squeeze()
#                 pred_mask = pred_mask.cpu().numpy()
        
#         pred_mask_org = resize(pred_mask, ORG_RES, anti_aliasing=True)
        
#         plt.subplot(1, 6, i+2)
#         plt.imshow(pred_mask_org)
#         plt.axis("off")
#         plt.title(f"{model_name}")
    
#     plt.show()

Overall, the normal pretrained Unet architecture or the DeepLabV3 model seem to produce the best results, one possibility could be to average the models.

# Evaluate two class predictions qualitatively per model

In [9]:
test_dataset = PixelClassificationDataset(
    image_paths=test_image_paths,
    mask_paths=test_mask_paths,
    transform=None,
    augmentations_per_image=1,
    include_original=True,
    binary_mode=True
)

In [10]:
two_class_loader = ModelLoader(project_root)

# Load all models for three classes
two_class_loader.load_all_models({
    "UNet_2": ("unet_2", "cnn"),
    "UNet++_2": ("unet++_2", "cnn"),
    "DeepLabV3_2": ("deeplabv3_2", "cnn"),
    "UNet_basic_2": ("unet_basic_2", "cnn"),
    "Random Forest_2": ("random_forest_2", "rf"),
})

model_names = two_class_loader.list_models()

Loaded CNN: UNet_2
Loaded CNN: UNet++_2
Loaded CNN: DeepLabV3_2
Loaded CNN: UNet_basic_2
Model loaded from: /Users/levin/Documents/Uni/Master/semester_3/research_project/ASON/checkpoints/random_forest_2c_balanced.joblib
Loaded RF: Random Forest_2
Loaded 5 models total
Loaded Models:
------------------------------------------------------------
UNet_2                         [cnn] unet_resnet34_imagenet_2c.pth
UNet++_2                       [cnn] unet++_resnet34_imagenet_2c.pth
DeepLabV3_2                    [cnn] deeplabv3_resnet34_imagenet_2c.pth
UNet_basic_2                   [cnn] unet_no_weights_2c.pth
Random Forest_2                [rf ] random_forest_2c_balanced.joblib
------------------------------------------------------------


In [16]:
# for model_name in model_names:
#     model = two_class_loader.get_model(model_name)
#     model_type = model_names[model_name]
#     print("="*100)
#     print(f"Evaluating model: {model_name}")
#     for i in range(len(test_dataset)):
#         test_image_tensor, test_mask_tensor = test_dataset[i]

#         if model_type == "rf":
#             test_image = test_image_tensor.permute(1, 2, 0).cpu().numpy()
#             test_image = (test_image * 255).astype(np.uint8)
#             pred_mask = model(test_image)
        
#         else:
#             test_image_tensor = test_image_tensor.unsqueeze(0).to(device)

#             with torch.no_grad():
#                 pred_logits = model(test_image_tensor)
#                 pred_mask = torch.argmax(pred_logits, dim=1).squeeze()
#                 pred_mask = pred_mask.cpu().numpy()

#         test_mask = test_mask_tensor.numpy()
#         original_image = tifffile.imread(test_image_paths[i])
        
#         plt.imshow(original_image)
#         plt.axis("off")
#         plt.title("Original Image")
#         plt.show()

#         compare_two_images(pred_mask, test_mask, "Predicted Mask", "Original Mask")
        
#         pred_mask_org = resize(pred_mask, ORG_RES, anti_aliasing=True)
#         test_mask_org = resize(test_mask, ORG_RES, anti_aliasing=True)
#         compare_two_images(pred_mask_org, test_mask_org, "Predicted Mask ORG", "Original Mask ORG")

In [17]:
# for i in range(len(test_dataset)):
#     test_image_tensor, test_mask_tensor = test_dataset[i]
#     test_mask = test_mask_tensor.numpy()
#     test_mask_org = resize(test_mask, ORG_RES, anti_aliasing=True)
#     plt.figure(figsize=(60, 10))
#     plt.subplot(1, 6, 1)
#     plt.imshow(test_mask_org)
#     plt.axis("off")
#     plt.title("Original Mask")

#     test_image = test_image_tensor.permute(1, 2, 0).cpu().numpy()
#     test_image = (test_image * 255).astype(np.uint8)

#     test_image_tensor = test_image_tensor.unsqueeze(0).to(device)
    
#     for i, model_name in enumerate(model_names):
#         model = two_class_loader.get_model(model_name)
#         model_type = model_names[model_name]
        
#         if model_type == "rf":
#             pred_mask = model(test_image)
        
#         else:
#             with torch.no_grad():
#                 pred_logits = model(test_image_tensor)
#                 pred_mask = torch.argmax(pred_logits, dim=1).squeeze()
#                 pred_mask = pred_mask.cpu().numpy()
        
#         pred_mask_org = resize(pred_mask, ORG_RES, anti_aliasing=True)
        
#         plt.subplot(1, 6, i+2)
#         plt.imshow(pred_mask_org)
#         plt.axis("off")
#         plt.title(f"{model_name}")
    
#     plt.show()

We can see that the two class prediction task is way easier for the models, which is expected. From qualitative evaluation it seems again that Unet and DeepLabV3 are performing best, here a combination of both could be even better. 