In [31]:
import torch
import torch.nn as nn


class DiceLoss(nn.Module):
    def __init__(self, smooth=1.0, reduction: str = 'mean'):
        """
        Initializes the DiceLoss module.

        Parameters:
            smooth (float): A smoothing factor to avoid division by zero errors.
                Defaults to 1.0.
            reduction (str): Specifies the reduction to apply to the output: 'none', 'mean', or 'sum':
                'none': No reduction will be applied.
                'mean': The mean of the losses will be returned.
                'sum': The sum of the losses will be returned.
        """
        super().__init__()
        self.smooth = smooth

    def forward(self, inputs, targets):
        """
        Calculates the Dice Loss between the inputs and targets.

        Parameters:
            inputs (torch.Tensor): The predicted segmentation map. Assumes values
                are in the range [0, 1].
            targets (torch.Tensor): The ground truth segmentation map. Should be a
                binary tensor (values 0 or 1).

        Returns:
        - torch.Tensor: The calculated Dice Loss.
        """

        # Ensure the inputs and targets are of the same shape.
        assert inputs.size() == targets.size(), "Input and target must have the same size."

        # Flatten the tensors to simplify the computation.
        inputs_flat = inputs.view(-1)
        targets_flat = targets.view(-1)

        # Calculate the intersection and the union.
        intersection = (inputs_flat * targets_flat).sum()
        union = inputs_flat.sum() + targets_flat.sum()
        
        # Compute the Dice coefficient.
        dice_coeff = (2. * intersection + self.smooth) / (union + self.smooth)
        
        # Calculate Dice loss.
        dice_loss = 1 - dice_coeff

        return dice_loss

In [1]:
import torch
import torch.nn.functional as F

class DiceLoss(torch.nn.Module):
    def __init__(self, smooth=1.0, reduction='mean'):
        """
        Dice Loss for comparing the similarity of two batch of data, usually used in image segmentation.
        
        Parameters:
            smooth (float, optional): A smoothing term to avoid division by zero. Defaults to 1.0.
            reduction (str, optional): Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'.
                                       'none': no reduction will be applied.
                                       'mean': the sum of the output will be divided by the number of elements in the output.
                                       'sum': the output will be summed. Defaults to 'mean'.
        """
        super(DiceLoss, self).__init__()
        self.smooth = smooth
        self.reduction = reduction

    def forward(self, input, target):
        """
        Forward pass of the Dice loss.
        
        Parameters:
            input (Tensor): Predicted probabilities for each class. Shape (N, C, H, W).
            target (Tensor): Ground truth. Shape (N, C, H, W).
            
        Returns:
            Tensor: The calculated Dice Loss.
        """
        assert input.size() == target.size(), "Input sizes must be equal."
        assert input.dim() == 4, "Input must be a 4D Tensor."
        
        input = torch.sigmoid(input)  # Apply sigmoid to clamp predictions to [0,1]
        
        # Flatten label and prediction tensors
        input = input.view(-1)
        target = target.view(-1)
        
        intersection = (input * target).sum()
        dice = (2. * intersection + self.smooth) / (input.sum() + target.sum() + self.smooth)
        
        if self.reduction == 'none':
            return 1 - dice
        elif self.reduction == 'sum':
            return torch.sum(1 - dice)
        elif self.reduction == 'mean':
            return torch.mean(1 - dice)
        else:
            raise ValueError("Reduction parameter must be 'none', 'mean', or 'sum'.")


In [2]:
from monai import losses


oof = DiceLoss()
foo = losses.DiceLoss()

In [18]:
a = torch.randn(32, 1, 128, 128)
b = torch.randn(32, 1, 128, 128)

loss = oof(a, b)

In [8]:

a = torch.randint(0, 2, (32, 1, 1024, 1024))
b = torch.randint(0, 2, (32, 1, 1024, 1024))

foo(a, b)

tensor(0.5001)

In [6]:
import math

from typing import List

from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import _LRScheduler


class CosineAnnealingLinearWarmup(_LRScheduler):
    """
    Implements a warmup cosine annealing learning rate scheduler.

    Attributes:
        warmup_epochs (int): The number of warmup epochs.
        max_epochs (int): The total number of epochs.
        initial_lr (float): The initial learning rate.
        cosine_annealing_epochs (int): The number of epochs for the cosine annealing phase.

    Methods:
        get_lr(): Get the learning rate for each parameter group.
    """

    def __init__(
        self,
        optimizer: Optimizer,
        warmup_epochs: int,
        max_epochs: int,
        initial_lr: float = 1e-6,
        last_epoch: int = -1,
    ) -> None:
        """
        Initialize a new WarmupCosineAnnealingLR instance.

        Args:
            optimizer (torch.optim.Optimizer): The optimizer for which to schedule the learning rate.
            warmup_epochs (int): The number of warmup epochs.
            max_epochs (int): The total number of epochs.
            initial_lr (float, optional): The initial learning rate. Default is 1e-6.
            last_epoch (int, optional): The index of the last epoch. Default is -1.
        """
        self.warmup_epochs = warmup_epochs
        self.max_epochs = max_epochs
        self.initial_lr = initial_lr
        self.cosine_annealing_epochs = self.max_epochs - self.warmup_epochs

        super().__init__(optimizer, last_epoch)

    def get_lr(self) -> List[float]:
        """
        Get the learning rate for each parameter group.

        Returns:
            List[float]: The learning rate for each parameter group.
        """
        if self.last_epoch < self.warmup_epochs:
            lr = [self.initial_lr + (base_lr - self.initial_lr) * (self.last_epoch) / self.warmup_epochs for base_lr in self.base_lrs]
        else:
            cos_input = math.pi * (self.last_epoch - self.warmup_epochs) / self.cosine_annealing_epochs
            lr = [base_lr * (1 + math.cos(cos_input)) / 2 for base_lr in self.base_lrs]

        # Applying lr_scale if it exists in the parameter group (in case of layer-wise learning rate decay usage)
        for i, param_group in enumerate(self.optimizer.param_groups):
            if "lr_scale" in param_group:
                lr[i] *= param_group["lr_scale"]

        return lr


In [None]:
import torch.optim as optim


optim.AdamW(weight_decay=)

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim

# Assuming CosineAnnealingLinearWarmup is defined as per your implementation

# Define a simple model
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Linear(10, 2)  # An example layer
        self.fc2 = nn.Linear(2, 100)

    def forward(self, x):
        return self.fc(x) + self.fc2(x)

# Instantiate the model
model = SimpleNet()

# Define an optimizer
optimizer = optim.SGD(model.parameters(), lr=0.1)  # Initial lr doesn't matter, scheduler sets it
optim.Adam

# Parameters for the scheduler
warmup_epochs = 5
max_epochs = 100
initial_lr = 1e-6

# Add lr_scale to some parameter groups (optional, for demonstration)
for i, param_group in enumerate(optimizer.param_groups):
    param_group["lr_scale"] = 1.0 #* (0.5 + i)  # Assuming a uniform scale here; adjust as needed

# Instantiate the scheduler
scheduler = CosineAnnealingLinearWarmup(optimizer, warmup_epochs, max_epochs, initial_lr)

# Example training loop
for epoch in range(max_epochs):
    # Training step would go here
    # ...

    # Step the scheduler
    scheduler.step()

   # Print the learning rate for each parameter group
    for i, lr in enumerate(scheduler.get_lr(), 1):
        print(f"Epoch {epoch+1}, Parameter Group {i}, Current Learning Rate: {lr}")

# Remember, in a real scenario, you would include loss.backward() and optimizer.step() calls to actually train the model.


Epoch 1, Parameter Group 1, Current Learning Rate: 0.020000800000000003
Epoch 2, Parameter Group 1, Current Learning Rate: 0.040000600000000004
Epoch 3, Parameter Group 1, Current Learning Rate: 0.0600004
Epoch 4, Parameter Group 1, Current Learning Rate: 0.08000020000000001
Epoch 5, Parameter Group 1, Current Learning Rate: 0.1
Epoch 6, Parameter Group 1, Current Learning Rate: 0.09997266286704631
Epoch 7, Parameter Group 1, Current Learning Rate: 0.09989068136093873
Epoch 8, Parameter Group 1, Current Learning Rate: 0.09975414512725057
Epoch 9, Parameter Group 1, Current Learning Rate: 0.09956320346634877
Epoch 10, Parameter Group 1, Current Learning Rate: 0.09931806517013612
Epoch 11, Parameter Group 1, Current Learning Rate: 0.09901899829374047
Epoch 12, Parameter Group 1, Current Learning Rate: 0.0986663298624003
Epoch 13, Parameter Group 1, Current Learning Rate: 0.09826044551386744
Epoch 14, Parameter Group 1, Current Learning Rate: 0.09780178907671788
Epoch 15, Parameter Group 