## Import

In [1]:
# !pip install --user albumentations

In [2]:
import os
import cv2
from PIL import Image
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Utils

In [3]:
# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

## Custom Dataset

In [4]:
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image
        
        mask_path = self.data.iloc[idx, 2]
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask[mask == 255] = 12 #배경을 픽셀값 12로 간주

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

## Data Loader

In [5]:
transform = A.Compose(
    [   
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = CustomDataset(csv_file='./train_source.csv', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=0)

## Define Model

In [6]:
# U-Net의 기본 구성 요소인 Double Convolution Block을 정의합니다.
def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True)
    )

# 간단한 U-Net 모델 정의
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.dconv_down1 = double_conv(3, 64)
        self.dconv_down2 = double_conv(64, 128)
        self.dconv_down3 = double_conv(128, 256)
        self.dconv_down4 = double_conv(256, 512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.dconv_up3 = double_conv(256 + 512, 256)
        self.dconv_up2 = double_conv(128 + 256, 128)
        self.dconv_up1 = double_conv(128 + 64, 64)

        self.conv_last = nn.Conv2d(64, 13, 1) # 12개 class + 1 background

    def forward(self, x):
        conv1 = self.dconv_down1(x)
        x = self.maxpool(conv1)

        conv2 = self.dconv_down2(x)
        x = self.maxpool(conv2)

        conv3 = self.dconv_down3(x)
        x = self.maxpool(conv3)   

        x = self.dconv_down4(x)

        x = self.upsample(x)        
        x = torch.cat([x, conv3], dim=1)

        x = self.dconv_up3(x)
        x = self.upsample(x)        
        x = torch.cat([x, conv2], dim=1)

        x = self.dconv_up2(x)
        x = self.upsample(x)        
        x = torch.cat([x, conv1], dim=1)

        x = self.dconv_up1(x)

        out = self.conv_last(x)

        return out

## Model Train

In [7]:
# model 초기화
model = UNet().to(device)

# loss function과 optimizer 정의
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# training loop
for epoch in range(20):  # 20 에폭 동안 학습합니다.
    model.train()
    epoch_loss = 0
    print(epoch)
    for images, masks in tqdm(dataloader):
        images = images.float().to(device)
        masks = masks.long().to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.squeeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader)}')

0


100%|██████████| 138/138 [02:29<00:00,  1.08s/it]


Epoch 1, Loss: 1.2298406630322554
1


100%|██████████| 138/138 [02:07<00:00,  1.08it/s]


Epoch 2, Loss: 0.6074171852374423
2


100%|██████████| 138/138 [02:07<00:00,  1.08it/s]


Epoch 3, Loss: 0.49255051323469135
3


100%|██████████| 138/138 [02:11<00:00,  1.05it/s]


Epoch 4, Loss: 0.4039179950520612
4


100%|██████████| 138/138 [02:15<00:00,  1.02it/s]


Epoch 5, Loss: 0.3484960116338039
5


100%|██████████| 138/138 [02:11<00:00,  1.05it/s]


Epoch 6, Loss: 0.30353307777988736
6


100%|██████████| 138/138 [02:11<00:00,  1.05it/s]


Epoch 7, Loss: 0.28367168747860455
7


100%|██████████| 138/138 [02:11<00:00,  1.05it/s]


Epoch 8, Loss: 0.270313228170077
8


100%|██████████| 138/138 [02:10<00:00,  1.06it/s]


Epoch 9, Loss: 0.2368305577100187
9


100%|██████████| 138/138 [02:11<00:00,  1.05it/s]


Epoch 10, Loss: 0.22144791246324347
10


100%|██████████| 138/138 [02:06<00:00,  1.09it/s]


Epoch 11, Loss: 0.20406525279732718
11


100%|██████████| 138/138 [02:05<00:00,  1.10it/s]


Epoch 12, Loss: 0.18626867796199909
12


100%|██████████| 138/138 [02:05<00:00,  1.10it/s]


Epoch 13, Loss: 0.1700184735590997
13


100%|██████████| 138/138 [02:10<00:00,  1.06it/s]


Epoch 14, Loss: 0.18030504817548004
14


100%|██████████| 138/138 [02:12<00:00,  1.04it/s]


Epoch 15, Loss: 0.16685301875290665
15


100%|██████████| 138/138 [02:12<00:00,  1.04it/s]


Epoch 16, Loss: 0.14917941785592964
16


100%|██████████| 138/138 [02:09<00:00,  1.06it/s]


Epoch 17, Loss: 0.1796184319840825
17


100%|██████████| 138/138 [02:15<00:00,  1.02it/s]


Epoch 18, Loss: 0.14369066604885503
18


100%|██████████| 138/138 [02:15<00:00,  1.02it/s]


Epoch 19, Loss: 0.12920162896963133
19


100%|██████████| 138/138 [02:17<00:00,  1.01it/s]

Epoch 20, Loss: 0.12217064096551875





## Inference

In [8]:
test_dataset = CustomDataset(csv_file='./test.csv', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=0)

In [9]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        outputs = model(images)
        outputs = torch.softmax(outputs, dim=1).cpu()
        outputs = torch.argmax(outputs, dim=1).numpy()
        # batch에 존재하는 각 이미지에 대해서 반복
        for pred in outputs:
            pred = pred.astype(np.uint8)
            pred = Image.fromarray(pred) # 이미지로 변환
            pred = pred.resize((960, 540), Image.NEAREST) # 960 x 540 사이즈로 변환
            pred = np.array(pred) # 다시 수치로 변환
            # class 0 ~ 11에 해당하는 경우에 마스크 형성 / 12(배경)는 제외하고 진행
            for class_id in range(12):
                class_mask = (pred == class_id).astype(np.uint8)
                if np.sum(class_mask) > 0: # 마스크가 존재하는 경우 encode
                    mask_rle = rle_encode(class_mask)
                    result.append(mask_rle)
                else: # 마스크가 존재하지 않는 경우 -1
                    result.append(-1)

100%|██████████| 119/119 [02:32<00:00,  1.28s/it]


## Submission

In [10]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result
submit

Unnamed: 0,id,mask_rle
0,TEST_0000_class_0,215761 51 216721 51 217681 51 218641 73 219601...
1,TEST_0000_class_1,222472 5 223432 5 224392 5 225348 9 226308 9 2...
2,TEST_0000_class_2,1 154 687 428 1647 433 2602 438 3562 438 4522 ...
3,TEST_0000_class_3,230217 4 231177 4 232137 4 233097 4 234052 9 2...
4,TEST_0000_class_4,46698 9 47658 9 48618 9 49578 9 50538 9 51498 ...
...,...,...
22771,TEST_1897_class_7,914 30 1874 30 5718 9 6678 9 7630 8 8590 8 955...
22772,TEST_1897_class_8,121 519 678 124 1081 519 1638 124 2041 514 260...
22773,TEST_1897_class_9,213811 9 214771 9 215727 13 216687 13 217647 1...
22774,TEST_1897_class_10,292428 13 293388 13 294344 30 295304 30 296264...


In [11]:
submit.to_csv('./baseline_submit.csv', index=False)