In [1]:
# !pip install -q opencv-python==4.5.5.64
# !pip install -q --force-reinstall albumentations==1.0.3

In [2]:
import numpy as np
import cv2
import matplotlib as plt
import matplotlib.pyplot as plt
import torch
from  torch.utils.data import Dataset, DataLoader
import torchmetrics
from torchmetrics import Dice, JaccardIndex #IOU
import segmentation_models_pytorch as smp
import albumentations as A
# to convert np.array to torch.tensor
from albumentations.pytorch import ToTensorV2
# others
import os
# processing effecting
from tqdm import tqdm 
# read all images in a folder
from glob import glob

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# declaration dataset
import os
import pandas as pd
from torchvision.io import read_image

class cityScapeDataset(Dataset):
    def __init__(self, root_dir, txt_file, transform=None):
        super().__init__()
        self.root_dir = root_dir
        self.txt_file = txt_file
        self.transform = transform
        self.img_path_list = []

        # get filename without extension
        with open(self.txt_file) as file_:
                for line in file_:
                    #TODO: modify with custom dataset
                    self.img_path_list.append(line.split('.')[0])


    def __len__(self):
        return len(self.img_path_list)

    def __getitem__(self, idx):
        #TODO: modify with custom dataset
        image_path = os.path.join(self.root_dir, "IMG", "{}.png".format(self.img_path_list[idx]))
        mask_path = os.path.join(self.root_dir, "MASK", "{}.png".format(self.img_path_list[idx]))
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        if self.transform is not None:
            transformed = self.transform(image=image, mask=mask)
            transformed_image = transformed["image"]
            transformed_mask = transformed["mask"]
            return transformed_image, transformed_mask
        return image, mask

In [9]:
import albumentations as A
original_size = (1024, 2048)
train_size = (384, 384)

train_transform = A.Compose([
    A.RandomCrop(height=original_size[0]-50, width=original_size[1]-50),
    A.Resize (height=train_size[0], width=train_size[1], interpolation=1, always_apply=False, p=1),
    A.HorizontalFlip(),
    A.RandomBrightnessContrast(),
    A.Blur(),
    A.RGBShift(),
    A.Cutout(num_holes=5, max_h_size=10, max_w_size=10, fill_value=0),
    A.Normalize(),
    ToTensorV2(),
])
test_transformt = A.Compose([
    A.RandomCrop(height=original_size[0]-50, width=original_size[1]-50),
    A.Resize (height=train_size[0], width=train_size[1], interpolation=1, always_apply=False, p=1),
    A.Normalize (),
    ToTensorV2(),
])



In [5]:
# # test traindataset
train_dataset = cityScapeDataset(".", r".\trainval.txt", test_transformt)
img, mask = train_dataset.__getitem__(0)
print(img.shape, type(img))
print(mask.shape, type(mask))

torch.Size([3, 384, 384]) <class 'torch.Tensor'>
torch.Size([384, 384]) <class 'torch.Tensor'>


In [6]:
import torch.nn as nn
import torch.nn.functional as F

def Unet_Block(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, 1, 1),
        nn.ReLU(),
        nn.Conv2d(in_channels, out_channels, 3, 1, 1),
        nn.ReLU()
    )

class Unet_Model(nn.Module):
    def __init__(self, n_classes):
        super().__init__()
        self.n_classes = n_classes
        # max pooling
        self.max_pool  = nn.MaxPool2d(2)
        # interpolation
        self.up_conv  = nn.Upsample(scale_factor=2, mode="bilinear")
        self.block_down1 = Unet_Block(3, 64)
        self.block_down2 = Unet_Block(64, 128)
        self.block_down3 = Unet_Block(128, 256)
        self.block_down4 = Unet_Block(256, 512)
        self.block_neck   = Unet_Block(512, 1024)
        self.block_up1   = Unet_Block(1024 + 512, 512)
        self.block_up2   = Unet_Block(512+512, 256)
        self.block_up3   = Unet_Block(256+128, 128)
        self.block_up4   = Unet_Block(128+64, 64)
        # B, n_classes, H, W
        self.conv_classify= nn.Conv2d(64, self.n_classes, 1)

    def forward(self, x):
        # encoder
        x1 = self.block_down1(x)
        x = self.max_pool(x1)
        x2 = self.block_down2(x)
        x = self.max_pool(x2)
        x3 = self.block_down3(x)
        x = self.max_pool(x3)
        x4 = self.block_down4(x)
        x = self.max_pool(x4)

        #neck
        x = self.block_neck(x)

        #decoder
        #[b c=1024 h w] cat [b c=512 h w] -> [b c=1536 h w]  (dim=1 - channel)
        x = torch.cat([x4, self.up_conv(x)], dim=1)
        x = self.block_up1(x)
        x = torch.cat([x3, self.up_conv(x)], dim=1)
        x = self.block_up2(x)
        x = torch.cat([x2, self.up_conv(x)], dim=1)
        x = self.block_up3(x)
        x = torch.cat([x1, self.up_conv(x)], dim=1)
        x = self.block_up4(x)

        return self.conv_classify(x)


In [17]:
Unet = Unet_Model(4)
x = torch.rand(4, 3, train_size[0], train_size[1])
print("Input: ", x.shape)
y = Unet(x)
print("Input: ", y.shape)


Input:  torch.Size([4, 3, 384, 384])


RuntimeError: Given groups=1, weight of size [64, 3, 3, 3], expected input[4, 64, 384, 384] to have 3 channels, but got 64 channels instead