In [1]:
import torch
torch.manual_seed(17)

<torch._C.Generator at 0x7f88089e91b0>

In [2]:
import os
import cv2
from PIL import Image
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from albumentations.core.transforms_interface import ImageOnlyTransform,DualTransform
from tqdm.auto import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from glob import glob
import matplotlib.pyplot as plt
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
id2label = {
    0:'background',
    1:'Load',
    2:'Sidewalk',
    3:'Construction',
    4:'Fence',
    5:'Pole',
    6:'Traffic Light',
    7:'Traffic Sign',
    8:'Nature',
    9:'Sky',
    10:'Person',
    11:'Rider',
    12:'Car',
    13:'Background',
}
id2label_check = {
    0:'background',
    1:'Load',
    2:'Sidewalk',
    3:'Construction',
    4:'Fence',
    5:'Pole',
    6:'Traffic Light',
    7:'Traffic Sign',
    8:'Nature',
    9:'Sky',
    10:'Person',
    11:'Rider',
    12:'Car',
    13:'background_car',
    14:'background_255',
}
label2id = {id2label[x]:x for x in id2label}
label2id

{'background': 0,
 'Load': 1,
 'Sidewalk': 2,
 'Construction': 3,
 'Fence': 4,
 'Pole': 5,
 'Traffic Light': 6,
 'Traffic Sign': 7,
 'Nature': 8,
 'Sky': 9,
 'Person': 10,
 'Rider': 11,
 'Car': 12,
 'Background': 13}

In [4]:
from transformers import Mask2FormerForUniversalSegmentation
from transformers import AutoImageProcessor
model_checkpoint = 'last_epoch3'
model_checkpoint = 'best_model_base3'

# model_checkpoint = 'best_model'

# model_checkpoint = 'last_epoch_g'
# # model_name = 'checkpoint_final_74_new'
model = Mask2FormerForUniversalSegmentation.from_pretrained(model_checkpoint,
                                                          id2label=id2label,label2id=label2id,ignore_mismatched_sizes=True
                                                          )
model2 = Mask2FormerForUniversalSegmentation.from_pretrained(model_checkpoint,
                                                          id2label=id2label,label2id=label2id,ignore_mismatched_sizes=True
                                                          )
# model = Mask2FormerForUniversalSegmentation.from_pretrained(model_checkpoint,ignore_mismatched_sizes=True)
preprocessor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-large-cityscapes-semantic",do_resize=False)

In [5]:
# preprocessor.size = {
#     "height": 784,
#     "width": 784
# }

In [6]:
# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [7]:
import matplotlib.pyplot as plt
ADE_MEAN = np.array([123.675, 116.280, 103.530]) / 255
ADE_STD = np.array([58.395, 57.120, 57.375]) / 255
class CustomDataset(Dataset):
    def __init__(self, image_path:list,mask_path:list, transform=None, infer=False):
        self.transform = transform
        self.infer = infer
        self.img_path=image_path
        self.mask_path=mask_path
    def __len__(self):
        # return len(self.data)
        return len(self.img_path)
    

    def __getitem__(self, idx):
        img_path = self.img_path[idx]
        image = cv2.imread(f"{img_path}")
        ori_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.infer:
            if self.transform:
                image = self.transform(image=ori_image)['image']
            return image,ori_image
        mask_path = self.mask_path[idx]
        ori_mask = cv2.imread(f"{mask_path}", cv2.IMREAD_GRAYSCALE)
        if self.transform:
            augmented = self.transform(image=ori_image, mask=ori_mask)
            image = augmented['image']
            mask = augmented['mask']
            if mask.size()==0 or image.size()==0:
                print("ERROR READ IMAGE FAILE")
        return image, mask ,ori_image,ori_mask

In [8]:
def collate_fn(batch):
    inputs = list(zip(*batch))
    images = inputs[0]
    original_images = inputs[1]
    
    batch = preprocessor.preprocess(
        images,
        ignore_index = 255,
        return_tensors="pt",
    )
    batch["original_images"] = original_images
    return batch


In [9]:
sub_transform = A.Compose([
    ToTensorV2(),
])

In [10]:
sub_glob= glob('./test_image/*.png')
sub_glob.sort()
sub_dataset = CustomDataset(sub_glob, [], transform=sub_transform, infer=True)
print(len(sub_dataset))
sub_dataloader = DataLoader(sub_dataset, batch_size=1, shuffle=False,collate_fn=collate_fn)
len(sub_dataloader)

1898


1898

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model2.to(device)
model.eval()
result = []
fig = plt.figure()
rows = 15
cols = 15
i = 1
for idx, batch in enumerate(tqdm(sub_dataloader)):

  # print(batch.shape)
  pixel_values = batch['pixel_values']

  # Forward pass
  with torch.no_grad():
    outputs = model(pixel_values=pixel_values.to(device))
    outputs2 = model2(pixel_values=pixel_values.to(device))
  # get original images 
    original_images = batch['original_images']
    target_sizes = [(image.shape[0], image.shape[1]) for image in original_images]
    # predict segmentation maps
    predicted_segmentation_maps = preprocessor.post_process_semantic_segmentation(outputs,target_sizes=target_sizes)
    predicted_segmentation_maps2 = preprocessor.post_process_semantic_segmentation(outputs2,target_sizes=target_sizes)
    
    for pred,pred2 in zip(predicted_segmentation_maps,predicted_segmentation_maps2):
        pred = pred.cpu().numpy()
        pred2 = pred2.cpu().numpy()
        # pred = cv2.copyTo(pred,mask_valid)
        pred = pred.astype(np.uint8)
        pred2 = pred2.astype(np.uint8)
        pred = Image.fromarray(pred) # 이미지로 변환
        pred2 = Image.fromarray(pred2)
        pred = pred.resize((960, 540), Image.NEAREST) # 960 x 540 사이즈로 변환
        pred2 = pred2.resize((960, 540), Image.NEAREST)
        pred = np.array(pred) # 다시 수치로 변환
        pred2 = np.array(pred2) 
        # class 0 ~ 11에 해당하는 경우에 마스크 형성 / 12(배경)는 제외하고 진행
        pred[pred2==5]=5
        pred[pred2==4]=4
        pred[pred2==10]=10
        pred[pred2==13]=13
        pred[pred2==0]=0
        for class_id in range(1,13):
            class_mask = (pred == class_id).astype(np.uint8)
            if np.sum(class_mask) > 0: # 마스크가 존재하는 경우 encode
                mask_rle = rle_encode(class_mask)
                result.append(mask_rle)
            else: # 마스크가 존재하지 않는 경우 -1  
                result.append(-1)


  0%|          | 0/1898 [00:00<?, ?it/s]

<Figure size 640x480 with 0 Axes>

In [13]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result
submit

Unnamed: 0,id,mask_rle
0,TEST_0000_class_0,210709 3 211668 5 212607 10 212627 14 212652 3...
1,TEST_0000_class_1,-1
2,TEST_0000_class_2,598 273 1557 275 2516 276 3476 276 4436 277 53...
3,TEST_0000_class_3,201964 6 202916 26 203876 45 204837 74 205798 ...
4,TEST_0000_class_4,-1
...,...,...
22771,TEST_1897_class_7,151289 5 152246 12 152270 13 153205 14 153226 ...
22772,TEST_1897_class_8,101 535 676 128 851 23 1061 534 1637 125 1811 ...
22773,TEST_1897_class_9,-1
22774,TEST_1897_class_10,-1


In [14]:
submit.to_csv('./baseline_submit.csv', index=False)