In [None]:
%load_ext autoreload
%autoreload 2

# Exercise 3

<img src="./images/03.png" width=800>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
import torch.optim as optim
import os
from PIL import Image
import mlflow
from torchinfo import summary

from torch.utils.data import Dataset, DataLoader, random_split
from utils import train_network, set_seed

  from tqdm.autonotebook import tqdm


In [None]:
os.environ['MLFLOW_TRACKING_URI'] = './mlruns08_3'
mlflow.set_tracking_uri(os.environ.get('MLFLOW_TRACKING_URI'))

In [None]:
mlflow.set_experiment('Exercise08_3')

<Experiment: artifact_location='/home/spakdel/my_projects/Books/Inside-Deep-Learning/Exercises_InsideDeepLearning/Chapter_07/mlruns07_1/143507330168611334', creation_time=1750415411076, experiment_id='143507330168611334', last_update_time=1750415411076, lifecycle_stage='active', name='Exercise07_1', tags={}>

In [None]:
torch.backends.cudnn.deterministic = True
set_seed(42)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

## Dataset and DataLoader

In [None]:
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
import re
import os
from glob import glob

data_url_zip = "https://github.com/kamalkraj/DATA-SCIENCE-BOWL-2018/blob/master/data/stage1_train.zip?raw=true"
path = './data/stage1_train'
os.makedirs(path, exist_ok=True)
resp = urlopen(data_url_zip)
zipfile = ZipFile(BytesIO(resp.read()))
zipfile.extractall(path=path)
paths = glob(path+'/*')

In [None]:
class DSB2018(Dataset):
    """Dataset class for the 2018 Data Science Bowl."""
    def __init__(self, paths):
        """paths: a list of paths to every image folder in the dataset"""
        self.paths = paths
    
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):   
        #There is only one image in each images path. So we will grab the "first" thing we find with "[0]" at the end
        img_path = glob(self.paths[idx] + "/images/*")[0]        
        #but there are multiple mask images in each mask path
        mask_imgs = glob(self.paths[idx] + "/masks/*")        
        #the image shape is (W, H, 4), the last dimension is an 'alpha' channel that is not used
        transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor()])
        image = transform(Image.open(img_path).convert('RGB'))
        masks = [transform(Image.open(mask_path).convert('L')) for mask_path in mask_imgs]
        final_mask = masks[0]
            # Perform logical OR with subsequent masks
        for i in range(1, len(masks)):
            final_mask = torch.logical_or(final_mask.bool(), masks[i].bool()).float()
        return image, final_mask 

In [None]:
dns_data = DSB2018(paths)
train_data , test_data = random_split(dns_data, [500, len(dns_data)-500])
batch_size = 16
train_seg_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_seg_loader = DataLoader(test_data,  batch_size=batch_size)

## Model

In [None]:
n_filters = 32
C = 3

### Upsample with ConvTranspose2d

In [None]:
def cnn_layer(in_filters, out_filters, kernel_size=3):
    padding = kernel_size // 2
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, kernel_size, padding=padding),
        nn.BatchNorm2d(out_filters),
        nn.LeakyReLU(),
    )

In [None]:
convtranspose_model = nn.Sequential(
    cnn_layer(C, n_filters), #First layer changes number of channels up to the large numer
    cnn_layer(n_filters, n_filters),
    nn.MaxPool2d(2), 
    cnn_layer(n_filters, 2*n_filters),
    cnn_layer(2*n_filters, 2*n_filters),
    nn.MaxPool2d(2),
    cnn_layer(2*n_filters, 4*n_filters),
    cnn_layer(4*n_filters, 4*n_filters),
    nn.MaxPool2d(2),
    cnn_layer(4*n_filters, 8*n_filters),
    cnn_layer(8*n_filters, 8*n_filters),
    nn.ConvTranspose2d(8*n_filters, 4*n_filters, (3,3), padding=1, output_padding=1, stride=2),
    nn.BatchNorm2d(4*n_filters),
    nn.LeakyReLU(),
    cnn_layer(4*n_filters, 4*n_filters),
    nn.ConvTranspose2d(4*n_filters, 2*n_filters, (3,3), padding=1, output_padding=1, stride=2),
    nn.BatchNorm2d(2*n_filters),
    nn.LeakyReLU(),
    cnn_layer(2*n_filters, 2*n_filters),
    nn.ConvTranspose2d(2*n_filters, n_filters, (3,3), padding=1, output_padding=1, stride=2),
    nn.BatchNorm2d(n_filters),
    nn.LeakyReLU(),
    nn.Conv2d(n_filters, 1, (3,3), padding=1), #Shape is now (B, 1, W, H)
)

### Upsample with Conv2dExpansion

In [None]:
class Conv2dExpansion(nn.Module):
    def __init__(self, n_filters_in, n_filters_out, kernel_size, stride=1):
        super().__init__()
        padding = kernel_size // 2
        # Upsample the image by a factor of 2
        self.upsample = nn.Upsample(scale_factor=2, mode='nearest') # or 'bilinear', 'bicubic'
        self.conv = nn.Conv2d(n_filters_in, n_filters_out, kernel_size, stride=stride, padding=padding)

    def forward(self, x):
        x = self.upsample(x)
        x = self.conv(x)
        return x

In [None]:
conv2dexpansion_model = nn.Sequential(
    cnn_layer(C, n_filters), 
    cnn_layer(n_filters, n_filters),
    nn.MaxPool2d(2), 
    cnn_layer(n_filters, 2*n_filters),
    cnn_layer(2*n_filters, 2*n_filters),
    nn.MaxPool2d(2),
    cnn_layer(2*n_filters, 4*n_filters),
    cnn_layer(4*n_filters, 4*n_filters),
    nn.MaxPool2d(2),
    cnn_layer(4*n_filters, 8*n_filters),
    cnn_layer(8*n_filters, 8*n_filters),
    Conv2dExpansion(8*n_filters, 4*n_filters, (3,3), stride=1),
    nn.BatchNorm2d(4*n_filters),
    nn.LeakyReLU(),
    cnn_layer(4*n_filters, 4*n_filters),
    Conv2dExpansion(4*n_filters, 2*n_filters, (3,3), stride=1),
    nn.BatchNorm2d(2*n_filters),
    nn.LeakyReLU(),
    cnn_layer(2*n_filters, 2*n_filters),
    Conv2dExpansion(2*n_filters, n_filters, (3,3), stride=1),
    nn.BatchNorm2d(n_filters),
    nn.LeakyReLU(),
    nn.Conv2d(n_filters, 1, (3,3), padding=1), #Shape is now (B, 1, W, H)
)

## Training

In [None]:
loss_func = nn.BCEWithLogitsLoss()
epochs = 50
params = {
    'device': device,
    'loss_func': loss_func.__class__.__name__,
    'epochs': epochs,
    'batch_size': batch_size,
    }

In [None]:
def iou(outputs: torch.Tensor, labels: torch.Tensor, smooth=1e-6):
    outputs = torch.sigmoid(outputs)
    outputs = (outputs > 0.5).float()
    outputs = outputs.view(outputs.size(0), -1)  # (B, N_pixels)
    labels = labels.view(labels.size(0), -1)    # (B, N_pixels)
    intersection = (outputs * labels).sum(dim=1)  # Element-wise product then sum
    union = (outputs + labels).sum(dim=1) - intersection
    iou = (intersection + smooth) / (union + smooth) # Add smooth to avoid division by zero
    return iou.mean()
score_funcs = {'iou': iou}

In [None]:
models = {
    'convtranspose_model': convtranspose_model,
    'conv2dexpansion_model': conv2dexpansion_model
}

In [None]:
for experiment, model in models.items():
    params['experiment'] = experiment
    optimizer = optim.AdamW(model.parameters())
    params['optimizer'] = optimizer.defaults
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params_all = sum(p.numel() for p in model.parameters())
    params['total_params'] = total_params
    params['total_params_all'] = total_params_all
    with open('model_summary.txt', 'w') as f:
        f.write(str(summary(model, inpt_size=(batch_size, C, 28, 28))))
    with mlflow.start_run(nested=True, run_name='experiment'):
        mlflow.log_artifact('model_summary.txt')
        mlflow.log_params(params)

        results = train_network(
            model=model,
            optimizer=optimizer,
            loss_func=loss_func,
            train_loader=train_seg_loader,
            valid_loader=test_seg_loader,
            epochs=epochs,
            device=device,
            score_funcs=score_funcs
            # checkpoint_file_save='model.pth',
            
        )

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

## Results

In [None]:
def evaluate_and_plot(models_dict):
    """Evaluate models on test data and visualize the reconstructions."""
    # Fetch a single batch of test images
    test_images, _ = next(iter(test_loader))
    test_images = test_images.to(device)
    
    reconstructions = {}
    for name, model in models_dict.items():
        model.eval()
        with torch.no_grad():
            reconstructions[name] = model(test_images).cpu()
    
    # Plotting
    n_images_to_show = 8
    num_models = len(models_dict)
    fig, axes = plt.subplots(num_models + 1, n_images_to_show, figsize=(n_images_to_show * 1.5, (num_models + 1) * 1.5))
    
    # Plot original images
    for i in range(n_images_to_show):
        ax = axes[0, i]
        ax.imshow(test_images[i].cpu().squeeze(), cmap='gray')
        ax.set_xticks([])
        ax.set_yticks([])
        if i == 0:
            ax.set_ylabel("Original", fontsize=12)
            
    # Plot reconstructions for each model
    row_idx = 1
    for name, recon_imgs in reconstructions.items():
        for i in range(n_images_to_show):
            ax = axes[row_idx, i]
            ax.imshow(recon_imgs[i].squeeze(), cmap='gray')
            ax.set_xticks([])
            ax.set_yticks([])
            if i == 0:
                ax.set_ylabel(name, fontsize=12)
        row_idx += 1
        
    plt.tight_layout()
    plt.suptitle("Autoencoder Reconstructions Comparison", fontsize=16, y=1.02)
    plt.show()


### Expected Differences in Results
You might observe the following differences, primarily related to the visual quality of the predicted masks and potentially subtle differences in quantitative metrics like IoU.

#### Visual Quality: Checkerboard Artifacts

- convtranspose_model: You are more likely to see checkerboard artifacts in the predicted segmentation masks. These manifest as a grid-like pattern of varying pixel intensities, making the edges of the segmented objects look rough or "blocky." This is precisely the issue Conv2dExpansion aims to solve. The artifacts might be more pronounced in areas with fine details or curved boundaries.
- conv2dexpansion_model: This model is designed to reduce or eliminate checkerboard artifacts. The explicit nn.Upsample step provides a smoother initial upscaling, and the subsequent nn.Conv2d operates on this already upscaled, uniformly sampled grid. You should expect the predicted masks to have smoother boundaries and less of a grid-like appearance.

#### Quantitative Metrics (IoU)

It's possible that the IoU (Intersection over Union) metric might be slightly higher or more stable for the conv2dexpansion_model. While the core issue of checkerboard artifacts is visual, these artifacts can sometimes lead to misclassifications at the pixel level, especially at object boundaries. A smoother, less artifact-ridden prediction could translate to a marginally better IoU, as the predicted mask might align more accurately with the ground truth.
However, the difference in IoU might be negligible or inconsistent across runs. Deep learning models are complex, and many factors (initialization, exact training dynamics, subtle architectural differences) can influence the final metric. The visual difference is often more striking than the quantitative one when dealing with checkerboard artifacts.


#### Training Stability / Convergence

It's less common, but sometimes models prone to checkerboard artifacts can have slightly less stable training or converge slower, as the network might struggle to optimize the unevenly distributed information. However, for a well-established architecture like a U-Net, this might not be a major issue, especially if the artifacts are not severe.