## Import

In [1]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2



device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Constant

In [None]:
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
IMAGE_CHANNEL = 3
BATCH_SIZE = 1


## Utils

In [2]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

## Loss


In [1]:
# import cv2
# import numpy as np

# def canny(img):
#     tmp = cv2.erode(img, kernel=np.ones(shape=(3, 3), dtype=np.float32) * 2, iterations=3)
#     tmp = cv2.Canny(tmp, 127, 255)
#     tmp = cv2.dilate(tmp, kernel=np.ones(shape=(3, 3), dtype=np.float32) * 2, iterations=2)
#     return tmp


# def BIoU(prd, gnd, function=canny, return_img=False):

#     prd, gnd = function(prd), function(gnd)

#     tp, fp, fn, _ = miou(prd, gnd)

#     if not return_img:
#         return tp / (tp + fp + fn) if tp + fp + fn != 0 else 1.0
#     else:
#         return tp / (tp + fp + fn) if tp + fp + fn != 0 else 1.0, prd, gnd


# def miou(pred, anno):

#     tp = np.logical_and(pred, anno)
#     tp = np.asarray(tp, 'float64')
#     tp = np.sum(tp)

#     fp = np.logical_and(np.logical_not(anno), pred)
#     fp = np.asarray(fp, 'float64')
#     fp = np.sum(fp)

#     fn = np.logical_and(np.logical_not(pred), anno)
#     fn = np.asarray(fn, 'float64')
#     fn = np.sum(fn)

#     tn = np.logical_and(np.logical_not(pred), np.logical_not(anno))
#     tn = np.asarray(tn, 'float64')
#     tn = np.sum(tn)

#     return tp, fp, fn, tn


# def iou_value(pred, anno):
#     tp, fp, fn, tn = miou(pred, anno)
#     return tp / (tp + fp + fn) if tp + fp + fn != 0 else 1.0


# def biou_value(pred, anno):
#     return BIoU(pred, anno, return_img=False)
#canny에서 kernel과 iterations 조건에 맞는지 확인 부탁함

import torch
import torch.nn as nn
import cv2
import numpy as np

def s1_loss(outputs,labels):
    criterion = nn.BCELoss()
    bceLoss = criterion(outputs, labels)
    return bceLoss + BLoss(outputs, labels)

def BLoss(outputs, labels):
    return IoU(edge(outputs), edge(labels))

def IoU(ou_edge, la_edge):
    intersection = torch.logical_and(ou_edge, la_edge).sum()
    union = torch.logical_or(ou_edge, la_edge).sum()
    iou = intersection.float() / (union.float() + 1e-8)  # Adding a small epsilon to avoid division by zero
    return 1 - iou

def edge(input):
    img = input.detach().cpu().numpy()
    tmp = canny(img)
    output = torch.from_numpy(tmp)
    return output

def canny(img):
    tmp = cv2.erode(img, kernel=np.ones(shape=(3, 3), dtype=np.float32) * 2, iterations=1)
    tmp = cv2.Canny(tmp, 127, 255)
    tmp = cv2.dilate(tmp, kernel=np.ones(shape=(3, 3), dtype=np.float32) * 2, iterations=1)
    return tmp


## Custom Dataset

In [5]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

## Data Loader

In [6]:
transform = A.Compose(
    [   
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='./train.csv', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)

FileNotFoundError: [Errno 2] No such file or directory: './train.csv'

## Define Model

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F



def conv_layer(input, input_channel, output_channel, mean=0.0, std=1.0, bias=0.0, filter_size=3, name=None):
    if name is None:
        name = 'conv_layer'

    conv = nn.Conv2d(input_channel, output_channel, kernel_size=filter_size, padding=filter_size // 2)
    conv.weight.data.normal_(mean, std)
    conv.bias.data.fill_(bias)

    return conv(input)


def deconv_layer(input, input_channel, output_channel, mean=0.0, std=1.0, bias=0.0, filter_size=3, stride=2,
                 name=None, batch_size=1):
    if name is None:
        name = 'deconv_layer'

    deconv = nn.ConvTranspose2d(input_channel, output_channel, kernel_size=filter_size, stride=stride,
                                padding=filter_size // 2, output_padding=1)
    deconv.weight.data.normal_(mean, std)
    deconv.bias.data.fill_(bias)

    return deconv(input)


def batch_norm(x, n_out, decay=0.99, eps=1e-5, name=None, trainable=True):
    if name is None:
        name = 'norm'

    norm = nn.BatchNorm2d(n_out, eps=eps, momentum=decay, affine=trainable)
    return norm(x)


def max_pooling(input, size=2):
    return F.max_pool2d(input, kernel_size=size, stride=size)


class B3SM(nn.Module):
    def __init__(self, batch_size, num_channels=3):
        super(B3SM, self).__init__()
        self.batch_size = batch_size
        self.num_channels = num_channels

    def conv_resi_conv(self, input, channel1, channel2, channel3, name='crc'):
        layer1 = self.conv(input, channel1, channel2, name='%s_crc1' % name)
        layer2 = self.resi_block(layer1, channel2, name='%s_crc2' % name)
        layer3 = self.conv(layer2, channel2, channel3, name='%s_crc3' % name)
        return layer3

    def resi_block(self, layer, channel, name):
        layer1 = self.conv(layer, channel, channel, name="%s_resi_block01" % name)
        layer2 = self.conv(layer1, channel, channel, name="%s_resi_block02" % name)
        layer3 = self.conv(layer2, channel, channel, name="%s_resi_block03" % name)
        layer4 = layer + layer3
        return F.relu(layer4)

    def conv(self, input, input_channel, output_channel, name='layer'):
        layer = conv_layer(input, input_channel, output_channel, filter_size=3, name=name)
        layer = F.relu(layer)
        layer = F.dropout(layer, p=0.0)
        layer = batch_norm(layer, output_channel, name=name)
        return layer

    def deconv(self, input, input_channel, output_channel, name='layer'):
        layer = deconv_layer(input, input_channel, output_channel, filter_size=3, stride=2, name=name)
        layer = F.dropout(layer, p=0.0)
        layer = batch_norm(layer, output_channel, name=name)
        return layer

    def merge(self, layer1, layer2):
        return layer1 + layer2

    def fusion2Block(self, input, channel):
        layer011 = self.conv(input, channel, 64, name='layer1011')

        layer112 = self.conv_resi_conv(layer011, 64, 64, 64, name='layer1112')
        maxpool11 = max_pooling(layer112)

        layer113 = self.conv_resi_conv(maxpool11, 64, 128, 128, name='layer1113')
        maxpool12 = max_pooling(layer113)

        layer114 = self.conv_resi_conv(maxpool12, 128, 256, 256, name='layer1114')
        maxpool13 = max_pooling(layer114)

        layer115 = self.conv_resi_conv(maxpool13, 256, 512, 512, name='layer1115')
        maxpool14 = max_pooling(layer115)

        # bridge
        layer116 = self.conv_resi_conv(maxpool14, 512, 1024, 1024, name='layer1116')
        layer116 = self.deconv(layer116, 1024, 512, name='upscaling005')

        # deconv
        upscaling4 = self.conv_resi_conv(layer116, 512, 512, 512, name='up04')
        upscaling4 = self.USIM(maxpool14, upscaling4, self.batch_size)
        upscaling4 = self.conv(upscaling4, 512, 256, name='upscaling004')

        upscaling3 = self.conv_resi_conv(upscaling4, 256, 256, 256, name="up03")
        upscaling3 = self.USIM(maxpool13, upscaling3, self.batch_size)
        upscaling3 = self.conv(upscaling3, 256, 128, name='upscaling003')

        upscaling2 = self.conv_resi_conv(upscaling3, 128, 128, 128, name="up02")
        upscaling2 = self.USIM(maxpool12, upscaling2, self.batch_size)
        upscaling2 = self.conv(upscaling2, 128, 64, name='upscaling002')

        conv = conv_layer(upscaling2, 64, 2, filter_size=1, name='conv')
        pred = torch.argmax(conv, dim=1)
        return pred, conv

    def fusionBlock(self, input, channel):
        layer011 = self.conv(input, channel, 64, name='layer011')

        layer112 = self.conv_resi_conv(layer011, 64, 64, 64, name='layer112')
        maxpool11 = self.max_pooling(layer112)

        layer113 = self.conv_resi_conv(maxpool11, 64, 128, 128, name='layer113')
        maxpool12 = self.max_pooling(layer113)

        layer114 = self.conv_resi_conv(maxpool12, 128, 256, 256, name='layer114')
        maxpool13 = self.max_pooling(layer114)

        layer115 = self.conv_resi_conv(maxpool13, 256, 512, 512, name='layer115')
        maxpool14 = self.max_pooling(layer115)

        # bridge
        layer116 = self.conv_resi_conv(maxpool14, 512, 1024, 1024, name='layer116')

        # deconv
        upscaling4 = self.deconv(layer116, 1024, 512, name='upscaling4')
        upscaling4 = self.merge(upscaling4, layer115)
        upscaling4 = self.conv_resi_conv(upscaling4, 512, 512, 512, name='up4')

        upscaling3 = self.deconv(upscaling4, 512, 256, name='upscaling3')
        upscaling3 = self.merge(upscaling3, layer114)
        upscaling3 = self.conv_resi_conv(upscaling3, 256, 256, 256, name="up3")

        upscaling2 = self.deconv(upscaling3, 256, 128, name="upscaling2")
        upscaling2 = self.merge(upscaling2, layer113)
        upscaling2 = self.conv_resi_conv(upscaling2, 128, 128, 128, name="up2")

        upscaling1 = self.deconv(upscaling2, 128, 64, name='upscaling1')
        upscaling1 = self.merge(upscaling1, layer112)
        upscaling1 = self.conv_resi_conv(upscaling1, 64, 64, 64, name='up1')

        conv = conv_layer(upscaling1, 64, 2, filter_size=1, name='conv_tmp')
        pred = torch.argmax(conv, dim=1)
        return pred, conv


    def USIM(self, layer1, layer2, batch):
        _, H, W, C = layer2.size()

        resized1 = F.interpolate(layer1, size=(2 * H, 2 * W), mode='nearest')
        resized2 = F.interpolate(layer2, size=(2 * H, 2 * W), mode='nearest')

        def get_init_values(h, w, flag='LU'):
            init_value = torch.zeros((2 * h, 2 * w), dtype=torch.float32)
            if flag == 'LU':
                H, W = 0, 0
            elif flag == 'RU':
                H, W = 0, 1
            elif flag == 'LD':
                H, W = 1, 0
            elif flag == 'RD':
                H, W = 1, 1
            else:
                return None

            for i in range(H, 2 * h, 2):
                for j in range(W, 2 * w, 2):
                    init_value[i, j] = 1.0

            return init_value

        slsh_init = get_init_values(H, W, 'RU') + get_init_values(H, W, 'LD')
        slsh_init = slsh_init.unsqueeze(0).unsqueeze(3)
        bslh_init = get_init_values(H, W, 'LU') + get_init_values(H, W, 'RD')
        bslh_init = bslh_init.unsqueeze(0).unsqueeze(3)

        slsh = nn.Parameter(slsh_init, requires_grad=False)
        bslh = nn.Parameter(bslh_init, requires_grad=False)

        layer = resized1 * slsh + resized2 * bslh
        return layer

    def forward(self, input):
        semi_prediction = self.fusionBlock(input, self.num_channels)
        semi_prediction = semi_prediction.split(2, dim=3)[1]
        semi_prediction = torch.cat([semi_prediction, semi_prediction, semi_prediction], dim=3)

        after_usim = self.USIM(semi_prediction, input, self.batch_size)

        pred, conv = self.fusion2Block(after_usim, 3)

        return pred, conv, semi_prediction


## Model Train

In [3]:
import glob
import os
import cv2
import numpy as np
from tqdm import tqdm
import argparse
import torch
import torch.nn as nn
import torch.optim as optim

from loss import *


# 커맨드 라인 인자 파싱을 위한 ArgumentParser 생성
parser = argparse.ArgumentParser(description="Enter valid args.")

parser.add_argument(
    "--image_path",
    "--ip",
    default="./data/images/02_testset",
    metavar="IMAGE_PATH",
    help="로드하고 테스트할 이미지 경로.",
)
parser.add_argument(
    "--annotation_path",
    "--ap",
    default="./data/annotations/02_testset",
    metavar="ANNOTATION_PATH",
    help="로드한 이미지에 대응하는 주석(Annotation) 경로",
)
parser.add_argument(
    "--weight_path", "--wp", default=None, help="사전 훈련된 가중치의 경로."
)

parser.add_argument("--height", default=IMAGE_HEIGHT, help="이미지 높이", type=int)
parser.add_argument("--width", default=IMAGE_WIDTH, help="이미지 너비", type=int)
parser.add_argument("--batch_size", default=BATCH_SIZE, help="배치 크기", type=int)
parser.add_argument("--learning_rate", default=1e-3, help="학습률", type=float)
parser.add_argument("--epoch", default=100, help="에폭 수", type=int)

args = parser.parse_args()

# 이미지와 마스크를 저장할 리스트를 생성합니다.
imgs = []
gnds = []

# 데이터로더를 통해 이미지와 마스크를 불러와서 리스트에 추가합니다.
for batch in dataloader:
    images, masks = batch
    imgs.append(images)
    gnds.append(masks)

# 리스트에 있는 텐서들을 연결하여 하나의 텐서로 만듭니다.
imgs = torch.cat(imgs, dim=0)
gnds = torch.cat(gnds, dim=0)

# 텐서를 넘파이 배열로 변환합니다.
imgs = imgs.numpy()
gnds = gnds.numpy()


# 무작위 인덱스 생성하여 셔플
indexes = np.asarray([i for i in range(0, len(imgs))])
np.random.shuffle(indexes)

# 디바이스 설정 (GPU 또는 CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 입력 이미지, 주석, 경계 가중치를 위한 텐서 플레이스홀더 생성
imgHolder = torch.zeros(
    (args.batch_size, args.height, args.width, IMAGE_CHANNEL),
    dtype=torch.float32,
    device=device,
)
gndHolder = torch.zeros(
    (args.batch_size, args.height, args.width), dtype=torch.int32, device=device
)
boundary_weights = torch.zeros((), dtype=torch.float32, device=device)

# B3SM 모델과 옵티마이저 생성
model = B3SM(IMAGE_CHANNEL).to(device)
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

average_loss_value = len(imgs)
for epoch in range(1, args.epoch + 1):
    loss_value = average_loss_value / (
        float(args.batch_size) * (len(indexes) // args.batch_size + 1)
    )
    average_loss_value = 0
    np.random.shuffle(indexes)

    model.train()
    for index in tqdm(
        range(0, len(indexes), args.batch_size),
        desc="epoch : (%03d/%03d) | loss : (%.4f)" % (epoch, args.epoch, loss_value),
    ):
        batch_index = list(indexes[index : index + args.batch_size])
        while len(batch_index) != args.batch_size:
            batch_index.append(np.random.randint(0, len(indexes) - 1, 1)[0])

        batch_index = np.asarray(batch_index)
        batch_imgs, batch_gnds = (
            imgs[indexes[batch_index]],
            gnds[indexes[batch_index]],
        )

        # 배치 데이터를 텐서 플레이스홀더로 복사
        imgHolder.copy_(torch.tensor(batch_imgs.transpose(0, 3, 1, 2)).to(device))
        gndHolder.copy_(torch.tensor(batch_gnds).to(device))
        boundary_weights.fill_(average_loss_value)

        optimizer.zero_grad()
        logits, prediction, semi_logits = model(imgHolder) # 최종결과, 건물이 있을지에 대한 확률, 
        loss = (
            IoU(logits, gndHolder)
            + s1_loss(semi_logits, gndHolder, batch_size=args.batch_size)
            * boundary_weights
            * 10
        )
        loss.backward()
        optimizer.step()

        average_loss_value += loss.item() * args.batch_size

        prd = (prediction[0] > 0).type(torch.uint8).cpu().numpy() * 255

        lists = [batch_imgs[0], batch_gnds[0], prd]
        [
            cv2.imshow("이미지%d" % window_index, image)
            for window_index, image in enumerate(lists)
        ]
        [
            cv2.moveWindow("이미지%d" % window_index, 300 * window_index, 0)
            for window_index, image in enumerate(lists)
        ]
        cv2.waitKey(1)



NameError: name 'args' is not defined

## Inference

In [7]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

In [8]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)
        
        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35
        
        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

100% 3790/3790 [04:18<00:00, 14.65it/s]


## Submission

In [9]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result

In [10]:
submit.to_csv('./submit.csv', index=False)