In [None]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F # 추가

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.model_selection import train_test_split#, KFold

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
os.chdir('/home/osh9423/soohyun')
os.getcwd()

In [None]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file, keep_default_na=False)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) ### gray image(channel 1)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        
        if mask_rle is None:
            mask = [[0, 0, 0, 0],
                    [0, 0, 0, 0],
                    [0, 0, 0, 0],
                    [0, 0, 0, 0]]
        else:
            mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

In [None]:
transform = A.Compose(
    [   
        # A.Resize(224, 224),
        A.RandomCrop(224, 224),
        A.RandomRotate90(),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='./train1.csv', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)

# train_dataset, val_dataset = train_test_split(dataset, test_size=0.2, random_state=123)
# train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)
# val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=True, num_workers=4)

In [66]:
from torch import nn
import torch
import torch.nn.functional as F
from torchvision import models

def conv3x3(in_, out):
    return nn.Conv2d(in_, out, (3, 3), padding=1)

class ConvolutionReLu(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvolutionReLu, self).__init__()
        self.convolution = conv3x3(in_=in_channels, out=out_channels)
        self.activation = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.convolution(x)
        x = self.activation(x)
        return x

class DecoderBlock(nn.Module):
    def __init__(self, in_channels, middle_channels, out_channels, is_deconvolution=True):
        super(DecoderBlock, self).__init__()
        self.in_channels = in_channels

        if is_deconvolution:
            self.block = nn.Sequential(
                ConvolutionReLu(in_channels, middle_channels),
                nn.ConvTranspose2d(
                    middle_channels,
                    out_channels,
                    kernel_size=(4, 4),
                    stride=(2, 2),
                    padding=(1, 1),
                ),
                nn.ReLU(inplace=True),
            )
        else:
            self.block = nn.Sequential(
                nn.Upsample(scale_factor=2, mode="bilinear"),
                ConvolutionReLu(in_channels, middle_channels),
                ConvolutionReLu(middle_channels, out_channels),
            )
    def forward(self, x):
        return self.block(x)

class AlBuNet(nn.Module):
    def __init__(
            self,
            num_classes=1,
            num_filters=32, ###
            pre_trained=True,
            is_deconvolution=False, ###
            res_net_to_use='resnet50',
            # res_net_to_use='resnet34'
    ):
        super().__init__()
        self.num_classes = num_classes

        self.pool = nn.MaxPool2d(2, 2) ###
        self.encoder = getattr(models, res_net_to_use)(pretrained=True)

        layers_features = [256, 512, 1024, 2048]
        # layers_features = [64, 128, 256, 512]

        self.non_linearity = nn.ReLU(inplace=True)

        self.convolution1 = nn.Sequential(
            self.encoder.conv1, self.encoder.bn1, self.encoder.relu, nn.MaxPool2d(2, 2) ###
        )

        self.convolution2 = self.encoder.layer1
        self.convolution3 = self.encoder.layer2
        self.convolution4 = self.encoder.layer3
        self.convolution5 = self.encoder.layer4

        self.center = DecoderBlock(
            layers_features[-1], num_filters*8*2, num_filters*8, is_deconvolution=True
        )

        self.dec5 = DecoderBlock(
            layers_features[-1] + num_filters * 8,
            num_filters*8*2,
            num_filters*8,
            is_deconvolution,
        )
        self.dec4 = DecoderBlock(
            layers_features[-2] + num_filters * 8,
            num_filters * 8 * 2,
            num_filters * 8,
            is_deconvolution,
        )
        self.dec3 = DecoderBlock(
            layers_features[-3] + num_filters * 8,
            num_filters * 4 * 2,
            num_filters * 2,
            is_deconvolution,
        )
        self.dec2 = DecoderBlock(
            layers_features[-4] + num_filters * 2,
            num_filters * 2 * 2,
            num_filters * 2 * 2,
            is_deconvolution,
        )
        self.dec1 = DecoderBlock(
            num_filters * 2 * 2, num_filters * 2 * 2, num_filters, is_deconvolution
        )
        self.dec0 = ConvolutionReLu(num_filters, num_filters)
        self.final = nn.Conv2d(num_filters, num_classes, kernel_size=(1, 1))

    def forward(self, x):
        convolution1 = self.convolution1(x)

        convolution2 = self.convolution2(convolution1)
        convolution3 = self.convolution3(convolution2)
        convolution4 = self.convolution4(convolution3)
        convolution5 = self.convolution5(convolution4)
        
        center = F.pad(self.center(self.pool(convolution5)), (0, 1, 0, 1)) # tensor size 맞춰야 해서 우측, 좌측에 각각 zero padding    

        dec5 = self.dec5(torch.cat([center, convolution5], 1))
        dec4 = self.dec4(torch.cat([dec5, convolution4], 1))
        dec3 = self.dec3(torch.cat([dec4, convolution3], 1))
        dec2 = self.dec2(torch.cat([dec3, convolution2], 1))
        dec1 = self.dec1(dec2)
        dec0 = self.dec0(dec1)

        x_out = self.final(dec0)

        return x_out

In [None]:
torch.cuda.empty_cache()
import gc
gc.collect()
print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3, 1), 'GB', 'Cached:', round(torch.cuda.memory_reserved(0)/1024**3, 1), 'GB')

In [None]:
# model 초기화
model = AlBuNet().to(device)

# loss function과 optimizer 정의
criterion = torch.nn.BCEWithLogitsLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.LambdaLR(
    optimizer=optimizer,
    lr_lambda=lambda epoch: 0.5 ** epoch,
    last_epoch=-1,
    verbose=False
)
torch.cuda.empty_cache()
# training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(dataloader):
        images = images.float().to(device)
        masks = masks.float().to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.unsqueeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    scheduler.step()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader)}')
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
PATH = './model/'
# torch.save(model, PATH + 'model.pt')  # 전체 모델 저장
torch.save(model.state_dict(), PATH + 'model_state_dict.pt')   # 모델 저장
# torch.save({
#     'model': model.state_dict(),
#     'optimizer': optimizer.state_dict()
# }, PATH + 'all.tar')  # 여러 가지 값 저장, 학습 중 진행 상황 저장을 위해 epoch, loss 값 등 일반 scalar값 저장 가능
# model = ResUNet(in_channels, num_classes).to(device)
# model.load_state_dict(torch.load("./model"))
# model.eval()

In [None]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

In [None]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)

        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35

        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result

In [None]:
submit.to_csv('/home/osh9423/wonjun/submit.csv', index=False)