This Kernel extracts building with unet based resnet34.The building dataset is from [alibaba](https://tianchi.aliyun.com/competition/entrance/231767/introduction)
.This kernel is just a basic architecture to help the green hand learn how to apply deep learning in building extraction. You can try the following steps to improve result when you master it.
- You can upload your dataset in kaggle
- try some data augmentation
- change the encoder, such as resnet50, senet, etc

In [None]:
import os
import cv2
import pdb
import time
import warnings
import random
import math
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import StratifiedKFold
from albumentations.pytorch.functional import img_to_tensor
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
from torch.optim import Adam
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset, sampler
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision 
from torchvision import models
from torchvision import transforms
from matplotlib import pyplot as plt
from PIL import Image
from random import sample
import tqdm
import glob
import cv2
warnings.filterwarnings("ignore")

# **show some images and labels**

In [None]:
input_img_folder = "../input/ali-data/train_images"
print(len(os.listdir(input_img_folder)))
imageList = glob.glob(f"{input_img_folder}/*png")
Image1_dir = imageList[random.randint(1, 150)]
Image2_dir = imageList[random.randint(1, 150)]
image1 = Image.open(Image1_dir)
label1 = Image.open(Image1_dir.replace("train_images", "train_labels"))
image2 = Image.open(Image2_dir)
label2 = Image.open(Image2_dir.replace("train_images", "train_labels"))
plt.subplot(221)
plt.title("image")
plt.imshow(image1)
plt.subplot(222)
plt.title("label")
plt.imshow(label1)
plt.subplot(223)
plt.imshow(image2)
plt.subplot(224)
plt.imshow(label2)
plt.show()

# data augmentation function 

In [None]:
def to_monochrome(x):
    # x_ = x.convert('L')
    x_ = np.array(x).astype(np.float32)  # convert image to monochrome
    return x_


def to_tensor(x):
    x_ = np.expand_dims(x, axis=0)
    x_ = torch.from_numpy(x_)
    return x_
    
    
class DualCompose:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, x, mask=None):
        for t in self.transforms:
            x, mask = t(x, mask)
        return x, mask


class VerticalFlip:
    def __init__(self, prob=0.5):
        self.prob = prob

    def __call__(self, img, mask=None):
        if random.random() < self.prob:
            img = cv2.flip(img, 0)
            if mask is not None:
                mask = cv2.flip(mask, 0)
        return img, mask


class HorizontalFlip:
    def __init__(self, prob=0.6):
        self.prob = prob

    def __call__(self, img, mask=None):
        if random.random() < self.prob:
            img = cv2.flip(img, 1)
            if mask is not None:
                mask = cv2.flip(mask, 1)
        return img, mask


class RandomFlip:
    def __init__(self, prob=0.6):
        self.prob = prob

    def __call__(self, img, mask=None):
        if random.random() < self.prob:
            d = random.randint(-1, 1)
            img = cv2.flip(img, d)
            if mask is not None:
                mask = cv2.flip(mask, d)

        return  img, mask


class RandomRotate90:
    def __init__(self, prob=0.6):
        self.prob = prob

    def __call__(self, img, mask=None):
        if random.random() < self.prob:
            factor = random.randint(0, 4)
            img = np.rot90(img, factor)
            if mask is not None:
                mask = np.rot90(mask, factor)
        return img.copy(), mask.copy()


class Rotate:
    def __init__(self, limit=90, prob=0.5):
        self.prob = prob
        self.limit = limit

    def __call__(self, img, mask=None):
        if random.random() < self.prob:
            angle = random.uniform(-self.limit, self.limit)
            height, width = img.shape[0:2]
            mat = cv2.getRotationMatrix2D((width/2, height/2), angle, 1.0)
            img = cv2.warpAffine(img, mat, (height, width),
                                 flags=cv2.INTER_LINEAR,
                                 borderMode=cv2.BORDER_REFLECT_101)
            if mask is not None:
                mask = cv2.warpAffine(mask, mat, (height, width),
                                      flags=cv2.INTER_LINEAR,
                                      borderMode=cv2.BORDER_REFLECT_101)

        return img, mask


class Shift:
    def __init__(self, limit=50, prob=0.5):
        self.limit = limit
        self.prob = prob

    def __call__(self, img, mask=None):
        if random.random() < self.prob:
            limit = self.limit
            dx = round(random.uniform(-limit, limit))
            dy = round(random.uniform(-limit, limit))

            height, width, channel = img.shape
            y1 = limit + 1 + dy
            y2 = y1 + height
            x1 = limit + 1 + dx
            x2 = x1 + width

            img1 = cv2.copyMakeBorder(img, limit+1, limit + 1, limit + 1, limit +1,
                                      borderType=cv2.BORDER_REFLECT_101)
            img = img1[y1:y2, x1:x2, :]
            if mask is not None:
                mask1 = cv2.copyMakeBorder(mask, limit+1, limit + 1, limit + 1, limit +1,
                                      borderType=cv2.BORDER_REFLECT_101)
                mask = mask1[y1:y2, x1:x2, :]

        return img, mask

## Dataloader

In [None]:
class RSDataset(Dataset):
    def __init__(self, input_root, mode="train", debug = False):
        super().__init__()
        self.input_root = input_root
        self.mode = mode
        if debug == False:
            self.input_ids = sorted(img for img in os.listdir(self.input_root))
        else:
            self.input_ids = sorted(img for img in os.listdir(self.input_root))[:500]
        
        self.mask_transform = transforms.Compose([
            transforms.Lambda(to_monochrome),
            transforms.Lambda(to_tensor),
        ])
            
        self.image_transform = transforms.Compose([
            transforms.ToTensor(),
        ])

        self.transform = DualCompose([
                RandomFlip(),
                RandomRotate90(),
                Rotate(),
                Shift(),
            ])
        
    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        # at this point all transformations are applied and we expect to work with raw tensors
        imageName = os.path.join(self.input_root,self.input_ids[idx])
        image = np.array(cv2.imread(imageName), dtype=np.float32)
        mask = np.array(cv2.imread(imageName.replace("train_images", "train_labels")))/255
        h, w, c = image.shape
        mask1 = np.zeros((h, w), dtype=int)

        if self.mode == "train":
            image, mask  =  self.transform(image, mask)
            mask1 = mask[:,:,0]
            return self.image_transform(image), self.mask_transform(mask1)
        else:
            mask1 = mask[:,:,0]
            return self.image_transform(image), self.mask_transform(mask1)

In [None]:
def build_dataloader(input_img_folder = "../input/ali-data/train_images",
                 batch_size = 16,
                 num_workers = 4):
    # Get valid dataset and train dataset from raw dataset
    num_train = len(sorted(img for img in os.listdir(input_img_folder)))
    indices = list(range(num_train))
    indices = sample(indices, len(indices))
    split = int(np.floor(0.2 * num_train))

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    #set up datasets
    train_dataset = RSDataset(
        input_img_folder,
        mode = "train",
    )

    val_dataset = RSDataset(
        input_img_folder,
        mode="valid",
    )

    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers, pin_memory=True
    )

    valid_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers, pin_memory=True
    )

    return train_loader, valid_loader

def seed_everything(seed=168):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything()

## Losses

This kernel uses BCE Loss (binary cross entropy loss)

In [None]:
def f_score(pr, gt, beta=1, eps=1e-7, threshold=None, activation='sigmoid'):
    """
    Args:
        pr (torch.Tensor): A list of predicted elements
        gt (torch.Tensor):  A list of elements that are to be predicted
        eps (float): epsilon to avoid zero division
        threshold: threshold for outputs binarization
    Returns:
        float: IoU (Jaccard) score
    """

    if activation is None or activation == "none":
        activation_fn = lambda x: x
    elif activation == "sigmoid":
        activation_fn = torch.nn.Sigmoid()
    elif activation == "softmax2d":
        activation_fn = torch.nn.Softmax2d()
    else:
        raise NotImplementedError(
            "Activation implemented for sigmoid and softmax2d"
        )

    pr = activation_fn(pr)

    if threshold is not None:
        pr = (pr > threshold).float()


    tp = torch.sum(gt * pr)
    fp = torch.sum(pr) - tp
    fn = torch.sum(gt) - tp

    score = ((1 + beta ** 2) * tp + eps) \
            / ((1 + beta ** 2) * tp + beta ** 2 * fn + fp + eps)

    return score


class DiceLoss(nn.Module):
    __name__ = 'dice_loss'

    def __init__(self, eps=1e-7, activation='sigmoid'):
        super().__init__()
        self.activation = activation
        self.eps = eps

    def forward(self, y_pr, y_gt):
        return 1 - f_score(y_pr, y_gt, beta=1., 
                           eps=self.eps, threshold=None, 
                           activation=self.activation)


class BCEDiceLoss(DiceLoss):
    __name__ = 'bce_dice_loss'

    def __init__(self, eps=1e-7, activation='sigmoid', lambda_dice=1.0, lambda_bce=1.0):
        super().__init__(eps, activation)
        if activation == None:
            self.bce = nn.BCELoss(reduction='mean')
        else:
            self.bce = nn.BCEWithLogitsLoss(reduction='mean')
        self.lambda_dice=lambda_dice
        self.lambda_bce=lambda_bce

    def forward(self, y_pr, y_gt):
        dice = super().forward(y_pr, y_gt)
        bce = self.bce(y_pr, y_gt)
        return (self.lambda_dice*dice) + (self.lambda_bce* bce)

## Some more utility functions

Here are some utility functions for calculating IoU and Dice scores

In [None]:
class Metrics:
    """Tracking mean metrics
    """

    def __init__(self, labels):
        """Creates an new `Metrics` instance.

        Args:
          labels: the labels for all classes.
        """

        self.labels = labels

        self.tn = 0
        self.fn = 0
        self.fp = 0
        self.tp = 0

    def add(self, actual, predicted, threshold=0.5):
        """Adds an observation to the tracker.

        Args:
          actual: the ground truth labels.
          predicted: the predicted labels.
        """
        # print(f"actual:{actual.shape}")
        # print(f"predicted:{predicted.shape}")
#         predicted = torch.sigmoid(predicted)
        actual = actual.view(-1).float()
        predicted = predicted.view(-1).float()
        assert (actual.shape == predicted.shape)
        probability = predicted
        p = (probability > threshold).float()
        t = (actual > threshold).float()
        # confusion = masks.view(-1).float() / actual.view(-1).float()
        confusion = p / t

        self.tn += torch.sum(torch.isnan(confusion)).item()
        self.fn += torch.sum(confusion == float("inf")).item()
        self.fp += torch.sum(confusion == 0).item()
        self.tp += torch.sum(confusion == 1).item()

    def get_miou(self):
        """Retrieves the mean Intersection over Union score.

        Returns:
          The mean Intersection over Union score for all observations seen so far.
        """
        return np.nanmean([self.tn / (self.tn + self.fn + self.fp), self.tp / (self.tp + self.fn + self.fp)])

    def get_fg_iou(self):
        """Retrieves the foreground Intersection over Union score.

        Returns:
          The foreground Intersection over Union score for all observations seen so far.
        """

        try:
            iou = self.tp / (self.tp + self.fn + self.fp)
        except ZeroDivisionError:
            iou = float("Inf")

        return iou

    def get_mcc(self):
        """Retrieves the Matthew's Coefficient Correlation score.

        Returns:
          The Matthew's Coefficient Correlation score for all observations seen so far.
        """

        try:
            mcc = (self.tp * self.tn - self.fp * self.fn) / math.sqrt(
                (self.tp + self.fp) * (self.tp + self.fn) * (self.tn + self.fp) * (self.tn + self.fn)
            )
        except ZeroDivisionError:
            mcc = float("Inf")

        return mcc

## Model Training and validation

In [None]:
def train(loader, num_classes, device, net, optimizer, criterion):
    global global_step

    num_samples = 0
    running_loss = 0

    metrics = Metrics(range(num_classes))

    net.train()
    for images, masks in loader:
        images = images.to(device)
        masks = masks.to(device)
        # print("images'size:{},masks'size:{}".format(images.size(),masks.size()))

        num_samples += int(images.size(0))

        optimizer.zero_grad()
        outputs= net(images)

        loss = criterion(outputs, masks)
        loss.backward()
        batch_loss = loss.item()
        optimizer.step()
        optimizer.zero_grad()

        running_loss += batch_loss

        for mask, output in zip(masks, outputs):
            prediction = output.detach()
            metrics.add(mask, prediction)

    assert num_samples > 0, "dataset contains training images and labels"

    return {
        "loss": running_loss / num_samples,
        "miou": metrics.get_miou(),
        "fg_iou": metrics.get_fg_iou(),
        "mcc": metrics.get_mcc(),
    }

def validate(loader, num_classes, device, net, scheduler, criterion):
    num_samples = 0
    running_loss = 0

    metrics = Metrics(range(num_classes))

    net.eval()
    with torch.no_grad():
        for images, masks in loader:
            images = images.to(device)
            masks = masks.to(device)

            num_samples += int(images.size(0))

            outputs = net(images)

            loss = criterion(outputs, masks)

            running_loss += loss.item()

            for mask, output in zip(masks, outputs):
                metrics.add(mask, output)

    assert num_samples > 0, "dataset contains validation images and labels"

    scheduler.step(metrics.get_miou())  # update learning rate
    torch.cuda.empty_cache()

    return {
        "loss": running_loss / num_samples,
        "miou": metrics.get_miou(),
        "fg_iou": metrics.get_fg_iou(),
        "mcc": metrics.get_mcc(),
    }

## Unet

In [None]:
class Conv2dReLU(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, padding=0,
                 stride=1, use_batchnorm=True, **batchnorm_params):

        super().__init__()

        layers = [
            nn.Conv2d(in_channels, out_channels, kernel_size,
                              stride=stride, padding=padding, bias=not (use_batchnorm)),
            nn.ReLU(inplace=True),
        ]

        if use_batchnorm:
            layers.insert(1, nn.BatchNorm2d(out_channels, **batchnorm_params))

        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)


class DecoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels,
                 use_batchnorm=True):
        super().__init__()
        self.block = nn.Sequential(
            Conv2dReLU(in_channels, out_channels, kernel_size=3, padding=1, use_batchnorm=use_batchnorm),
            Conv2dReLU(out_channels, out_channels, kernel_size=3, padding=1, use_batchnorm=use_batchnorm),
        )

    def forward(self, x):
        x, skip = x
        x = F.interpolate(x, scale_factor=2, mode='nearest')
        if skip is not None:
            x = torch.cat([x, skip], dim=1)
        x = self.block(x)
        return x


class UNet(nn.Module):
    """
        UNet (https://arxiv.org/abs/1505.04597) with Resnet34(https://arxiv.org/abs/1512.03385) encoder

        """
    def __init__(self, num_classes=1,  pretrained=True, use_batchnorm=True, freeze_encoder=False):
        """
        :param num_classes:
        :param pretrained:
            False - no pre-trained network is used
            True  - encoder is pre-trained with resnet34
        :is_deconv:
            False: bilinear interpolation is used in decoder
            True: deconvolution is used in decoder
        """
        super().__init__()
        self.num_classes = num_classes
        self.pool = nn.MaxPool2d(2, 2)

        net = torchvision.models.resnet34(pretrained=pretrained)
        net.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # with torch.no_grad():
        #     pretrained_conv1 = net.conv1.weight.clone()
        #     # Assign new conv layer with 4 input channels
        #     net.conv1 = torch.nn.Conv2d(4, 64, kernel_size=3, padding=1, bias=False)
        #     net.conv1.weight[:, :3] = pretrained_conv1
        #     net.conv1.weight[:, 3] = pretrained_conv1[:, 0]
        self.encoder = net

        decoder_channels = (256, 128, 64, 32, 16)
        encoder_channels = (512, 256, 128, 64, 64)
        in_channels = self.compute_channels(encoder_channels, decoder_channels)
        out_channels = decoder_channels

        for layer in self.encoder.parameters():
            layer.requires_grad = not freeze_encoder

        self.relu = nn.ReLU(inplace=True)

        self.conv0 = nn.Sequential(self.encoder.conv1,
                                   self.encoder.bn1,
                                   self.encoder.relu,
                                   self.pool)
        self.conv1 = self.encoder.layer1
        self.conv2 = self.encoder.layer2
        self.conv3 = self.encoder.layer3
        self.conv4 = self.encoder.layer4

        self.layer1 = DecoderBlock(in_channels[0], out_channels[0], use_batchnorm=use_batchnorm)
        self.layer2 = DecoderBlock(in_channels[1], out_channels[1], use_batchnorm=use_batchnorm)
        self.layer3 = DecoderBlock(in_channels[2], out_channels[2], use_batchnorm=use_batchnorm)
        self.layer4 = DecoderBlock(in_channels[3], out_channels[3], use_batchnorm=use_batchnorm)
        self.layer5 = DecoderBlock(in_channels[4], out_channels[4], use_batchnorm=use_batchnorm)
        self.final = nn.Conv2d(out_channels[4], num_classes, kernel_size=1)

    def compute_channels(self, encoder_channels, decoder_channels):
        channels = [
            encoder_channels[0] + encoder_channels[1],
            encoder_channels[2] + decoder_channels[0],
            encoder_channels[3] + decoder_channels[1],
            encoder_channels[4] + decoder_channels[2],
            0 + decoder_channels[3],
        ]
        return channels

    def forward(self, x):
        conv0 = self.encoder.conv1(x)
        conv0 = self.encoder.bn1(conv0)
        conv0 = self.encoder.relu(conv0)

        conv1 = self.pool(conv0)
        conv1 = self.conv1(conv1)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)
        conv4 = self.conv4(conv3)

        x = self.layer1([conv4, conv3])
        x = self.layer2([x, conv2])
        x = self.layer3([x, conv1])
        x = self.layer4([x, conv0])
        x = self.layer5([x, None])
        x = self.final(x)

        return torch.sigmoid(x)

In [None]:
######### set GPU mode
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using {}".format(device))

##### res-Unet
num_classes = 1
net = UNet()

best_miou = -100
num_epochs = 10      ########set the number of epoch
optimizer = Adam(params=net.parameters(), lr=0.005, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.2, patience=5, verbose=True)
criterion = BCEDiceLoss(eps=1.0, activation=None)
net = net.to(device)

###prepare dataset#############
train_loader, val_loader = build_dataloader(batch_size = 16, num_workers = 4)

for epoch in range(num_epochs):
        ####################train####################################
        start = time.strftime("%H:%M:%S")
        print(f"Starting epoch: {epoch+1} | phase: train | ⏰: {start}")
        train_hist = train(train_loader, num_classes, device, net, optimizer, criterion)
        print( 'loss',train_hist["loss"],
                'miou',train_hist["miou"],
                'fg_iou',train_hist["fg_iou"],
                'mcc',train_hist["mcc"])

        ######################valid##################################
        start = time.strftime("%H:%M:%S")
        print(f"Starting epoch: {epoch+1} | phase: valid | ⏰: {start}")
        val_hist = validate(val_loader, num_classes, device, net, scheduler, criterion)
        print('loss',val_hist["loss"],
                'miou',val_hist["miou"],
                'fg_iou',val_hist["fg_iou"],
                'mcc',val_hist["mcc"])
        
        torch.cuda.empty_cache()
        # saving model when the validation metric is best
        if val_hist["miou"] > best_miou:
            state = {
                "epoch": epoch + 1,
                "model": net,
                "best_miou": val_hist["miou"]
            }
            torch.save(state, 'model.pth')  # save model
            print("The model has saved successfully!")
            best_miou = val_hist["miou"]

# Inference

In [None]:
from scipy import ndimage as ndi
from skimage.morphology import opening, closing, square
import glob
import tqdm

n_workers = 4
component_size = 81
# predict on more model
net = torch.load('./model.pth', map_location=lambda storage, loc: storage)["model"]
net = net.to(device)

imglist = os.listdir(input_img_folder)
img = cv2.imread(os.path.join(input_img_folder, imglist[400]))
tensor = img_to_tensor(img)
tensor = Variable(torch.unsqueeze(tensor, dim=0).float(), requires_grad=False)
predict = net(tensor.to(device))[0,0,:,:]
predict = predict.detach().cpu().numpy()
predict[predict <= 0.5] = 0
predict[predict > 0.5] = 1

plt.subplot(121)
plt.imshow(img)
plt.subplot(122)
plt.imshow(predict)
plt.show()