# Imports

In [1]:
import torch
import torch.nn.functional as F
from torch.optim import lr_scheduler
import numpy as np
import torch.nn as nn
import time
import copy
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.datasets import ImageFolder
from PIL import Image
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
from segmentation_models_pytorch.encoders import get_preprocessing_fn
import segmentation_models_pytorch as smp

# Dataset Class

In [2]:
# Define a custom dataset class

class SegmentationDataset(Dataset):
    def __init__(self, root_dir, transform = None, target_size = (992, 416)):
        self.root_dir = root_dir
        self.transform = transform
        self.target_size = target_size

        self.image_folder = os.path.join(root_dir, 'images')
        self.mask_folder = os.path.join(root_dir, 'masks')
        
        self.images = [f for f in os.listdir(self.image_folder) if os.path.isfile(os.path.join(self.image_folder, f))]
        self.masks = [f for f in os.listdir(self.mask_folder) if os.path.isfile(os.path.join(self.mask_folder, f))]
        
        assert len(self.images) == len(self.masks), "Number of images and masks should be the same."
        
        self.imageConverter = transforms.Compose([transforms.PILToTensor()])
        self.maskConverter = transforms.Compose([transforms.ToTensor()])
        
    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_folder, self.images[idx])
        mask_path = os.path.join(self.mask_folder, self.masks[idx])

        # Load images
        image = Image.open(img_path).convert('RGB') 
        mask = Image.open(mask_path).convert('RGB')  
        
        # Convert to tensors 
        tensor_image = self.imageConverter(image)
        tensor_mask = self.maskConverter(mask)
        tensor_mask = tensor_mask[2:, :, :]
        
        # add padding 
        pad_height = max(self.target_size[0] - tensor_image.size(1), 0)
        pad_width = max(self.target_size[1] - tensor_image.size(2), 0)
        
        pad_top = pad_height // 2
        pad_bottom = pad_height - pad_top
        pad_left = pad_width // 2
        pad_right = pad_width - pad_left
            
        padded_image = transforms.functional.pad(tensor_image, (pad_left, pad_bottom, pad_right, pad_top), fill=255)
        padded_mask = transforms.functional.pad(tensor_mask, (pad_left, pad_bottom, pad_right, pad_top), fill=0)
        
        if self.transform:
            padded_image = self.transform(padded_image)
            padded_mask = self.transform(padded_mask)
        
        return torch.tensor(padded_image, dtype=torch.float32), torch.tensor(padded_mask, dtype=torch.float32)


# Run Parameters

In [3]:
# Training params 
BATCH_SIZE = 2
EPOCHS = 2 
LEARNING_RATE = 0.001

# Model params 
ENCODER_NAME = "resnet34"
ENCODER_WEIGHTS = "imagenet"

# Create the model
model = smp.Unet(
    encoder_name= ENCODER_NAME,           # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
    encoder_weights= ENCODER_WEIGHTS,     # use `imagenet` pre-trained weights for encoder initialization
    in_channels=3,                        # model input channels (1 for gray-scale images, 3 for RGB, etc.)
    classes=1,                            # model output channels (number of classes in your dataset)
    )

# Define loss function 
criterion = smp.losses.DiceLoss(smp.losses.BINARY_MODE, from_logits=True) # Binary dice Loss for binary segmentation

# Define optimizer 
optimizer = optim.Adam(model.parameters(), lr = LEARNING_RATE)
#Scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

run_name = "MODEL-" + model.__class__.__name__ + \
            "-BACKBONE-" + ENCODER_NAME +\
            "-BATCH_SIZE-" + str(BATCH_SIZE) + \
            "-EPOCHS-" + str(EPOCHS) + \
            "-OPTIMIZER-" + optimizer.__class__.__name__ + \
            "-LEARNRATE-" + str(LEARNING_RATE)

Downloading: "https://download.pytorch.org/models/resnet34-333f7ec4.pth" to /home/hp/.cache/torch/hub/checkpoints/resnet34-333f7ec4.pth
100%|██████████████████████████████████████| 83.3M/83.3M [00:28<00:00, 3.04MB/s]


# Device and Data Setup

In [4]:
# Proper directories 
TRAIN_DATA_DIR = 'image_data/train'
VAL_DATA_DIR = 'image_data/val'

# Set device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

# Set up dataset and dataloader
transform = transforms.Compose([])

trainDataset = SegmentationDataset(root_dir=TRAIN_DATA_DIR, transform=transform)
valDataset = SegmentationDataset(root_dir=VAL_DATA_DIR, transform=transform)

train_loader = DataLoader(trainDataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(valDataset, batch_size=BATCH_SIZE, shuffle=False)

# Training

In [5]:
# Train Loop 
for epoch in range(EPOCHS):
    model.train()

    # Use tqdm to add a progress bar
    for images, masks in tqdm(train_loader, desc = f'Epoch {epoch + 1}/{EPOCHS}', leave=False):
        images, masks = images.to(device), masks.to(device)
        
        # Forward pass
        outputs = model(images)

        loss = criterion(outputs, masks)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    
    val_loss = 0.0
    tp_epoch, fp_epoch, fn_epoch, tn_epoch = [], [], [], []
    dice_epoch = []
    
    with torch.no_grad():
        for val_images, val_masks in tqdm(val_loader, desc=f'Validation', leave=False):
            val_images, val_masks = val_images.to(device), val_masks.to(device)

            model_outputs = model(val_images)

            val_loss += criterion(model_outputs, val_masks).item()
            
            prob_outputs = model_outputs.sigmoid()
            #pred_masks = (prob_outputs > 0.5).float()
            
            tp_batch, fp_batch, fn_batch, tn_batch = smp.metrics.get_stats(prob_outputs, val_masks.to(torch.int), mode='binary', threshold=0.5)
            tp_epoch.append(tp_batch)
            fp_epoch.append(fp_batch)
            fn_epoch.append(fn_batch)
            tn_epoch.append(tn_batch)
            
    val_loss /= len(val_loader)
    
    tp = torch.cat(tp_epoch)
    fp = torch.cat(fp_epoch)
    fn = torch.cat(fn_epoch)
    tn = torch.cat(tn_epoch)
    
    per_image_iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro-imagewise")
    dataset_iou = smp.metrics.iou_score(tp, fp, fn, tn, reduction="micro")
    dice_score = smp.metrics.f1_score(tp, fp, fn, tn, reduction="micro")
    
    print(f"Epoch [{epoch + 1}/{EPOCHS}], Train Loss: {loss.item()}, Validation Loss: {val_loss}\n"
          f"Validation Pr. image IOU: {per_image_iou}, Validation Dataset IOU: {dataset_iou}, Dice Score: {dice_score}")
    
# Save the trained model
torch.save(model.state_dict(), run_name + '.pth')

  return torch.tensor(padded_image, dtype=torch.float32), torch.tensor(padded_mask, dtype=torch.float32)
                                                                                

RuntimeError: stack expects each tensor to be equal size, but got [1, 1021, 416] at entry 0 and [1, 975, 416] at entry 1