<a href="https://colab.research.google.com/github/quickgrid/AI-Resources/blob/master/paper-implementations/pytorch/u-net/U_Net_Pytorch_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# References:
- https://www.youtube.com/watch?v=IHq1t7NxS8k&t=22s

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms.functional as F

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(num_features=out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(num_features=out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, X):
        return self.conv(X)

In [None]:
class UNET(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
        super(UNET, self).__init__()

        self.ups = nn.ModuleList()
        self.downs = nn.ModuleList()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        # Downward path of UNET.
        for feature in features:
            self.downs.append(DoubleConv(in_channels=in_channels, out_channels=feature))
            in_channels = feature

        # Upward path of UNET.
        # Transposed convolution is used to scale up image before applying double convolutions.
        for feature in reversed(features):
            self.ups.append(
                nn.ConvTranspose2d(in_channels=feature * 2, out_channels=feature, kernel_size=2, stride=2)
            )
            self.ups.append(DoubleConv(in_channels=feature * 2, out_channels=feature))

        self.bottleneck = DoubleConv(in_channels=features[-1], out_channels=features[-1] * 2)
        self.final_conv = nn.Conv2d(in_channels=features[0], out_channels=out_channels, kernel_size=1, stride=1)

    def forward(self, X):
        skip_connections = []

        # Create connections in downward path.
        for down in self.downs:
            X = down(X)
            skip_connections.append(X)
            X = self.pool(X)

        X = self.bottleneck(X)

        # Reverse the connections such that the last added skip connection is at 0th position.
        skip_connections = skip_connections[::-1]

        # Upward connection of network to connect skip connections with upsample image.
        # In ups list in odd positions are upsample ones and in even positions are double convolutions.
        for idx in range(0, len(self.ups), 2):
            X = self.ups[idx](X)
            skip_connection = skip_connections[idx // 2]

            # In case image shape is odd and rounded to integer then skip connection and X will not match.
            # In this case the input is resized to match shape for concatenation.
            if X.shape != skip_connection.shape:
                X = F.resize(X, size=skip_connection.shape[2:])

            concat_skip = torch.cat((skip_connection, X), dim=1)
            X = self.ups[idx + 1](concat_skip)

        return self.final_conv(X)
        

In [None]:
class Tester():
    def __init__(self):
        super(Tester, self).__init__()

    def unet_test(self):
        X = torch.randn((3, 1, 161, 161))
        model = UNET(in_channels=1, out_channels=1)
        preds = model(X)
        print(X.shape)
        print(preds.shape)
        assert X.shape == preds.shape, "Shape of input and predicted images did not match."


In [None]:
if __name__ == "__main__":
    Tester().unet_test()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


torch.Size([3, 1, 161, 161])
torch.Size([3, 1, 161, 161])


In [None]:
import os
import numpy as np
from PIL import Image
from torch.utils.data import Dataset


class CarvanaDataset(Dataset):
    def __init__(
        self,
        image_dir,
        mask_dir,
        transform=None
    ):
        super(CarvanaDataset, self).__init__()
        
        self.image_dir = image_dir 
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(image_dir)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image_path = os.path.join(self.image_dir, self.images[index])

        # Since train image and mask have similar names, but different extensions
        # the masks can be loaded by replacing '.jpg' with mask ending name.
        mask_path = os.path.join(self.mask_dir, self.images[index].replace('jpg', '_mask.gif'))
        
        image = np.array(Image.open(image_path).convert('RGB'))

        # Masks are grayscale single channels images.
        mask = np.array(Image.open(mask_path).convert('L'), dtype=np.float32)

        # Since masks are black and white with value of 0 and 255. 
        # These values can be normalized by replacing white value of 255 to 1.
        mask[mask == 255.0] = 1.0

        # Apply augmentation is available.
        if self.transform is not None:
            augmentations = self.transform(image=image, mask=mask)
            image = augmentations["image"]
            mask = augmentations["mask"]

        return image, mask