# **Setup**

In [1]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from efficientnet_pytorch import EfficientNet

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [3]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        # print(img_path)

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

In [4]:
transform = A.Compose(
    [
        A.Resize(112, 112),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='./train.csv', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=6)

In [5]:
# U-Net의 기본 구성 요소인 Double Convolution Block을 정의합니다.
def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True)
    )
class UNet(nn.Module):
    def __init__(self, backbone_name='efficientnet-b5', classes=1, encoder_weights='imagenet'):
        super(UNet, self).__init__()

        self.backbone = EfficientNet.from_pretrained(backbone_name, weights_path=None)

        self.dconv_down1 = double_conv(3, 64)
        self.dconv_down2 = double_conv(64, 128)
        self.dconv_down3 = double_conv(128, 256)
        self.dconv_down4 = double_conv(256, 512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.dconv_up3 = double_conv(256 + 512, 256)
        self.dconv_up2 = double_conv(128 + 256, 128)
        self.dconv_up1 = double_conv(128 + 64, 64)

        self.conv_last = nn.Conv2d(64, classes, 1)

    def forward(self, x):
        # Backbone feature extraction
        backbone_features = self.backbone.extract_features(x)

        conv1 = self.dconv_down1(x)
        x = self.maxpool(conv1)

        conv2 = self.dconv_down2(x)
        x = self.maxpool(conv2)

        conv3 = self.dconv_down3(x)
        x = self.maxpool(conv3)

        x = self.dconv_down4(x)

        x = self.upsample(x)
        x = torch.cat([x, conv3], dim=1)

        x = self.dconv_up3(x)
        x = self.upsample(x)
        x = torch.cat([x, conv2], dim=1)

        x = self.dconv_up2(x)
        x = self.upsample(x)
        x = torch.cat([x, conv1], dim=1)

        x = self.dconv_up1(x)

        out = self.conv_last(x)

        return out


In [6]:
model = UNet().to(device)

model = UNet(backbone_name='efficientnet-b5', encoder_weights='imagenet').to(device)

Loaded pretrained weights for efficientnet-b5
Loaded pretrained weights for efficientnet-b5


In [7]:
# loss function과 optimizer 정의
criterion = torch.nn.BCEWithLogitsLoss().to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

# training loop
for epoch in range(15):
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(dataloader):
        images = images.float().to(device)
        masks = masks.float().to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.unsqueeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader)}')


100%|██████████| 447/447 [00:44<00:00, 10.11it/s]


Epoch 1, Loss: 0.17879804637141408


100%|██████████| 447/447 [00:43<00:00, 10.22it/s]


Epoch 2, Loss: 0.14100294953558


100%|██████████| 447/447 [00:43<00:00, 10.25it/s]


Epoch 3, Loss: 0.1304039648948633


100%|██████████| 447/447 [00:43<00:00, 10.26it/s]


Epoch 4, Loss: 0.12202076015523083


100%|██████████| 447/447 [00:43<00:00, 10.24it/s]


Epoch 5, Loss: 0.11629314637290818


100%|██████████| 447/447 [00:43<00:00, 10.21it/s]


Epoch 6, Loss: 0.1123832360043355


100%|██████████| 447/447 [00:44<00:00, 10.15it/s]


Epoch 7, Loss: 0.10885542846166048


100%|██████████| 447/447 [00:43<00:00, 10.19it/s]


Epoch 8, Loss: 0.10587058129659968


100%|██████████| 447/447 [00:43<00:00, 10.21it/s]


Epoch 9, Loss: 0.10339641096007904


100%|██████████| 447/447 [00:44<00:00, 10.11it/s]


Epoch 10, Loss: 0.10132595194166139


100%|██████████| 447/447 [00:43<00:00, 10.21it/s]


Epoch 11, Loss: 0.09947950474634533


100%|██████████| 447/447 [00:44<00:00, 10.13it/s]


Epoch 12, Loss: 0.09810325706698483


100%|██████████| 447/447 [00:43<00:00, 10.20it/s]


Epoch 13, Loss: 0.09620368299001548


100%|██████████| 447/447 [00:43<00:00, 10.18it/s]


Epoch 14, Loss: 0.09446330772716993


100%|██████████| 447/447 [00:43<00:00, 10.20it/s]

Epoch 15, Loss: 0.09275795751870078





In [8]:
model_name = "UNet_aug_upimg.pth"
torch.save(model.state_dict(), "./" + model_name)

In [9]:
transform = A.Compose(
    [
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

In [10]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=6)

In [11]:
model.load_state_dict(torch.load('./UNet_aug_upimg.pth', map_location=torch.device('cpu')))
model = model.to(device)

In [12]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)

        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        # Threshold
        masks = (masks > 0.35).astype(np.uint8)

        for i in range(len(images)):
            #작은 범위 삭제
            # mask_rle_delete=contoursDelet(masks[i])
            # mask_rle=rle_encode(mask_rle_delete)

            # Resize mask to 224x224
            # mask_resized = cv2.resize(masks[i], (224, 224))

            # 기존 코드
            mask_rle = rle_encode(masks[i])
            # Visualize mask
            #plt.imshow(mask_resized, cmap='gray')
            #plt.show()

            # Encode resized mask
            #mask_rle_resized = rle_encode(mask_resized)

            if mask_rle == '':  # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)


100%|██████████| 3790/3790 [07:32<00:00,  8.37it/s]


In [13]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result
submit.to_csv('./submit.csv', index=False)