Import

In [1]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')





Utils



In [2]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
os.chdir('/content/drive/MyDrive/Dacon')
os.getcwd()

'/content/drive/MyDrive/Dacon'

Custom Dataset

In [5]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

Data Loader

In [6]:
transform = A.Compose(
    [
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='./train.csv', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)



Define Model

In [7]:
# 각 레이어마다 Convolution과 Batchnorm, 그리고 ReLU를 2번 반복하는 구조
class conv_block_nested(nn.Module):

    def __init__(self, in_ch, mid_ch, out_ch):
        super(conv_block_nested, self).__init__()
        self.activation = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_ch, mid_ch, kernel_size=3, padding=1, bias=True)
        self.bn1 = nn.BatchNorm2d(mid_ch)
        self.conv2 = nn.Conv2d(mid_ch, out_ch, kernel_size=3, padding=1, bias=True)
        self.bn2 = nn.BatchNorm2d(out_ch)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.activation(x)

        x = self.conv2(x)
        x = self.bn2(x)
        output = self.activation(x)

        return output

# Nested Unet(Unet++)
class Nested_UNet(nn.Module):
    def __init__(self, in_ch=3, out_ch=1):
        super(Nested_UNet, self).__init__()
    # def __init__(self, num_classes, input_channels=3, deep_supervision=False):
    #     super().__init__()

        n1 = 64
        filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16]
        # num_filter = [32, 64, 128, 256, 512]

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.Up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        # self.deep_supervision = deep_supervision
        # self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        # DownSampling
        self.conv0_0 = conv_block_nested(in_ch, filters[0], filters[0])
        self.conv1_0 = conv_block_nested(filters[0], filters[1], filters[1])
        self.conv2_0 = conv_block_nested(filters[1], filters[2], filters[2])
        self.conv3_0 = conv_block_nested(filters[2], filters[3], filters[3])
        self.conv4_0 = conv_block_nested(filters[3], filters[4], filters[4])
        # self.conv0_0 = Unet_block(input_channels, num_filter[0], num_filter[0])
        # self.conv1_0 = Unet_block(num_filter[0], num_filter[1], num_filter[1])
        # self.conv2_0 = Unet_block(num_filter[1], num_filter[2], num_filter[2])
        # self.conv3_0 = Unet_block(num_filter[2], num_filter[3], num_filter[3])
        # self.conv4_0 = Unet_block(num_filter[3], num_filter[4], num_filter[4])

        # Upsampling & Dense skip
        self.conv0_1 = conv_block_nested(filters[0] + filters[1], filters[0], filters[0])
        self.conv1_1 = conv_block_nested(filters[1] + filters[2], filters[1], filters[1])
        self.conv2_1 = conv_block_nested(filters[2] + filters[3], filters[2], filters[2])
        self.conv3_1 = conv_block_nested(filters[3] + filters[4], filters[3], filters[3])
        # # N to 1 skip
        # self.conv0_1 = Unet_block(num_filter[0] + num_filter[1], num_filter[0], num_filter[0])
        # self.conv1_1 = Unet_block(num_filter[1] + num_filter[2], num_filter[1], num_filter[1])
        # self.conv2_1 = Unet_block(num_filter[2] + num_filter[3], num_filter[2], num_filter[2])
        # self.conv3_1 = Unet_block(num_filter[3] + num_filter[4], num_filter[3], num_filter[3])

        self.conv0_2 = conv_block_nested(filters[0]*2 + filters[1], filters[0], filters[0])
        self.conv1_2 = conv_block_nested(filters[1]*2 + filters[2], filters[1], filters[1])
        self.conv2_2 = conv_block_nested(filters[2]*2 + filters[3], filters[2], filters[2])
        # N to 2 skip
        # self.conv0_2 = Unet_block(num_filter[0]*2 + num_filter[1], num_filter[0], num_filter[0])
        # self.conv1_2 = Unet_block(num_filter[1]*2 + num_filter[2], num_filter[1], num_filter[1])
        # self.conv2_2 = Unet_block(num_filter[2]*2 + num_filter[3], num_filter[2], num_filter[2])


        self.conv0_3 = conv_block_nested(filters[0]*3 + filters[1], filters[0], filters[0])
        self.conv1_3 = conv_block_nested(filters[1]*3 + filters[2], filters[1], filters[1])
        # # N to 3 skip
        # self.conv0_3 = Unet_block(num_filter[0]*3 + num_filter[1], num_filter[0], num_filter[0])
        # self.conv1_3 = Unet_block(num_filter[1]*3 + num_filter[2], num_filter[1], num_filter[1])

        self.conv0_4 = conv_block_nested(filters[0]*4 + filters[1], filters[0], filters[0])
        # N to 4 skip
        # self.conv0_4 = Unet_block(num_filter[0]*4 + num_filter[1], num_filter[0], num_filter[0])

        self.final = nn.Conv2d(filters[0], out_ch, kernel_size=1)
        # if self.deep_supervision:
        #     self.output1 = nn.Conv2d(num_filter[0], num_classes, kernel_size=1)
        #     self.output2 = nn.Conv2d(num_filter[0], num_classes, kernel_size=1)
        #     self.output3 = nn.Conv2d(num_filter[0], num_classes, kernel_size=1)
        #     self.output4 = nn.Conv2d(num_filter[0], num_classes, kernel_size=1)

        # else:
        #     self.output = nn.Conv2d(num_filter[0], num_classes, kernel_size=1)

        # initialise weights
        # for m in self.modules():
        #     if isinstance(m, nn.Conv2d):
        #         init_weights(m, init_type='kaiming')
        #     elif isinstance(m, nn.BatchNorm2d):
        #         init_weights(m, init_type='kaiming')

    def forward(self, x):

        x0_0 = self.conv0_0(x)
        x1_0 = self.conv1_0(self.pool(x0_0))
        x0_1 = self.conv0_1(torch.cat([x0_0, self.Up(x1_0)], 1))

        x2_0 = self.conv2_0(self.pool(x1_0))
        x1_1 = self.conv1_1(torch.cat([x1_0, self.Up(x2_0)], 1))
        x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.Up(x1_1)], 1))

        x3_0 = self.conv3_0(self.pool(x2_0))
        x2_1 = self.conv2_1(torch.cat([x2_0, self.Up(x3_0)], 1))
        x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.Up(x2_1)], 1))
        x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.Up(x1_2)], 1))

        x4_0 = self.conv4_0(self.pool(x3_0))
        x3_1 = self.conv3_1(torch.cat([x3_0, self.Up(x4_0)], 1))
        x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.Up(x3_1)], 1))
        x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.Up(x2_2)], 1))
        x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.Up(x1_3)], 1))

        output = self.final(x0_4)
        return output
    # def forward(self, x):                    # (Batch, 3, 256, 256)

    #     x0_0 = self.conv0_0(x)
    #     x1_0 = self.conv1_0(self.pool(x0_0))
    #     x0_1 = self.conv0_1(torch.cat([x0_0, self.up(x1_0)], dim=1))

    #     x2_0 = self.conv2_0(self.pool(x1_0))
    #     x1_1 = self.conv1_1(torch.cat([x1_0, self.up(x2_0)], dim=1))
    #     x0_2 = self.conv0_2(torch.cat([x0_0, x0_1, self.up(x1_1)], dim=1))

    #     x3_0 = self.conv3_0(self.pool(x2_0))
    #     x2_1 = self.conv2_1(torch.cat([x2_0, self.up(x3_0)], dim=1))
    #     x1_2 = self.conv1_2(torch.cat([x1_0, x1_1, self.up(x2_1)], dim=1))
    #     x0_3 = self.conv0_3(torch.cat([x0_0, x0_1, x0_2, self.up(x1_2)], dim=1))

    #     x4_0 = self.conv4_0(self.pool(x3_0))
    #     x3_1 = self.conv3_1(torch.cat([x3_0, self.up(x4_0)], dim=1))
    #     x2_2 = self.conv2_2(torch.cat([x2_0, x2_1, self.up(x3_1)], dim=1))
    #     x1_3 = self.conv1_3(torch.cat([x1_0, x1_1, x1_2, self.up(x2_2)], dim=1))
    #     x0_4 = self.conv0_4(torch.cat([x0_0, x0_1, x0_2, x0_3, self.up(x1_3)], dim=1))

    #     if self.deep_supervision:
    #         output1 = self.output1(x0_1)
    #         output2 = self.output2(x0_2)
    #         output3 = self.output3(x0_3)
    #         output4 = self.output4(x0_4)
    #         output = (output1 + output2 + output3 + output4) / 4
    #     else:
    #         output = self.output(x0_4)

    #     return output

# U-Net++의 모델
# forward: 처음에 이미지(x)가 하나의 Convolution Block (Conv-BN-ReLU -> 2번 반복)을 거친다.

# 그렇게 해서 나온 결과 x0_0를 Pooling을 통해 x1_0이 나옵니다. 여기까지는 U-Net과 동일.
# 이후, 풀링한 x1_0을 Upsampling을 통해 사이즈를 키우고 x0_0과 Concatenate를 합니다.
# 이 때, dim=1은 채널 차원으로 결합하는 것입니다. 이를 차원을 써가며 살펴보겠습니다.

# 원본 이미지의 차원은 3개의 채널을 가진 (Batch Size, 3, 256, 256) 입니다.
# 그리고 하나의 Convolution Block을 통과한 x0_0의 차원은 (Batch, 32, 256, 256)입니다.
# Convolution Block을 통과하고도 이미지 사이즈가 같은 이유는 Padding을 해주었기 때문입니다.
# 그리고, x1_0은 x0_0를 풀링하고 Convolution Block을 통과해 (Batch, 64, 128, 128)이 됩니다.
# 이후 x1_0을 Upsampling하면 (Batch, 64, 256, 256)이 됩니다.
# 이를, x0_0 (Batch, 32, 256, 256)와 Concatenate를 해주면 (Batch, 96, 256, 256)이 됩니다.
# 이를, 한 번 더 Convolution Block에 통과시키면 (Batch, 32, 256, 256)이 됩니다.
# 이런 방식으로 기존 U-Net에 Skip을 촘촘히 연결하여 U-Net++ 모델이 구성됩니다.

Model Train

In [8]:
# model 초기화
model = Nested_UNet().to(device)

# loss function과 optimizer 정의
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# training loop
for epoch in range(10):  # 10 에폭 동안 학습
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(dataloader):
        images = images.float().to(device)
        masks = masks.float().to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.unsqueeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader)}')


100%|██████████| 447/447 [11:47<00:00,  1.58s/it]


Epoch 1, Loss: 0.14844342381162132


100%|██████████| 447/447 [08:01<00:00,  1.08s/it]


Epoch 2, Loss: 0.09703437690633522


100%|██████████| 447/447 [07:59<00:00,  1.07s/it]


Epoch 3, Loss: 0.08480732774641156


100%|██████████| 447/447 [07:55<00:00,  1.06s/it]


Epoch 4, Loss: 0.07876688333632428


100%|██████████| 447/447 [07:49<00:00,  1.05s/it]


Epoch 5, Loss: 0.07304198584067208


100%|██████████| 447/447 [07:57<00:00,  1.07s/it]


Epoch 6, Loss: 0.06896279679748836


100%|██████████| 447/447 [08:13<00:00,  1.10s/it]


Epoch 7, Loss: 0.06780665937886142


100%|██████████| 447/447 [08:19<00:00,  1.12s/it]


Epoch 8, Loss: 0.06358020624325046


100%|██████████| 447/447 [07:55<00:00,  1.06s/it]


Epoch 9, Loss: 0.06191405346429588


100%|██████████| 447/447 [07:48<00:00,  1.05s/it]

Epoch 10, Loss: 0.05934313072987584





Inference

In [9]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)



In [10]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)

        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35

        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

100%|██████████| 3790/3790 [53:09<00:00,  1.19it/s]


Submission

In [11]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result

In [12]:
submit.to_csv('./submit.csv', index=False)