## data slicing

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1' # nvidia-smi로 비어있는 gpu 확인하고 여기서 선택할것!

In [2]:
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

#import segmentation_models_pytorch as smp
import matplotlib.pyplot as plt
from IPython.display import clear_output 
import time
import os
import json
import random
from transformers import SegformerForSemanticSegmentation




random_seed = 42
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU

np.random.seed(random_seed)
random.seed(random_seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

  from .autonotebook import tqdm as notebook_tqdm


cuda


In [3]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    if mask_rle == -1:
        return np.zeros(shape, dtype=np.uint8)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [4]:
import bisect

def is_number_in_list(lst, target):
    index = bisect.bisect_left(lst, target)
    if index != len(lst) and lst[index] == target:
        return True
    return False

In [5]:
class split_stride:
    def __init__(self, csv_file, image_dir='../split_data_stride_160', stride=160):
        self.csv_file = csv_file
        self.stride = stride
        self.size = 224
        self.image_dir = image_dir
        with open("./train_trash.json","r") as js:
            json_file = json.load(js)
        
        train_trash = json_file["must"]
        train_quarter = json_file["quarter"]

        with open("./val_trash.json","r") as js:
            json_file = json.load(js)

        val_trash = json_file["must"]
        val_quarter = json_file["quarter"]

        trash_sum_list = train_trash + train_quarter + val_trash + val_quarter

        self.strong_filter_list = sorted(trash_sum_list)
        self.data = pd.read_csv(self.csv_file)
        self.splitlen = len(self.data) * (int(800 / stride + 1) ** 2)
        self.img_id = []
        self.img_path = []
        self.masks = []
        
        self.save_split_images()
        
        # CSV
        self.data = pd.DataFrame()
        self.data['img_id'] = self.img_id
        self.data['img_path'] = self.img_path
        for i in tqdm(range(len(self.data))):
            if self.masks[i]=='':
                self.masks[i]=-1
        self.data['mask_rle'] = self.masks
        self.data.to_csv('../data/jhs_stride_160.csv', index=False)
        print("CSV 작성 완료!")
    
    def save_split_images(self):
        count = 0
        for i in tqdm(range(len(self.data))):
            if is_number_in_list(self.strong_filter_list, i):
                continue
            image_path = "../data" + self.data.iloc[i, 1][1:]
            image = cv2.imread(image_path)
            mask = rle_decode(self.data.iloc[i, 2], (image.shape[0], image.shape[1]))
            
            img_height, img_width = image.shape[0], image.shape[1]
            
            for top in range(0, img_height, self.stride):
                if top + self.size > img_height:
                    break
                for left in range(0, img_width, self.stride):
                    if left + self.size > img_width:
                        break
                    bottom = top + self.size
                    right = left + self.size
                    
                    img_patch = image[top:bottom, left:right]
                    mask_patch = mask[top:bottom, left:right]
                    
                    text = str(count).zfill(8)
                    img_id = f'TRAIN_{text}'

                    cv2.imwrite(f'../split_data_stride_160/TRAIN_{text}.png', img_patch)
                    
                    self.img_id.append(img_id)
                    self.img_path.append(f'./split_data_stride_160/TRAIN_{text}.png')
                 
                    self.masks.append(rle_encode(mask_patch))
                    
                    count += 1
csv = split_stride(csv_file='../data/train.csv')

In [6]:
csv.data.head

## custom cells

In [8]:
id2label = {1:'building'}
label2id = {'building':1}
pretrained_model_name = "nvidia/mit-b3"
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True
)

Some weights of the model checkpoint at nvidia/mit-b3 were not used when initializing SegformerForSemanticSegmentation: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b3 and are newly initialized: ['decode_head.batch_norm.running_var', 'decode_head.linear_c.0.proj.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.weight', 'decode_head.linear_c.0.proj.weight', 'decode_

In [9]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer
        
        print("full dataset size : ",len(self.data))
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = "../data"+self.data.iloc[idx, 1][1:]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
                dic = {"pixel_values":image}

            return dic
        assert False , "SatelliteDataset class must be used as test dataset obj"

In [10]:
class TV_SatelliteDataset(Dataset):
    def __init__(self, csv_file="../data/jhs_stride_160.csv", transform=None, is_train = True, stride=200):
        self.is_train = is_train
        self.transform = transform
        self.stride = stride
        self.size = 224
        self.cutter = int(241920*0.8) #tv cutter
        
        if self.is_train:
            self.data = pd.read_csv(csv_file)[:self.cutter]
        else:
            self.data = pd.read_csv(csv_file)[self.cutter:]


        
        print("Full dataset size:", len(self.data))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        
        augmented = self.transform(image=cv2.cvtColor(cv2.imread(".."+self.data.iloc[idx, 1][1:]), cv2.COLOR_BGR2RGB), mask=rle_decode(self.data.iloc[idx, 2], (224, 224)))
        return {"pixel_values":augmented['image'],"labels":augmented['mask'].type(torch.LongTensor)}
        

        
        


In [11]:
aug1 = A.Compose(
    [   
        #A.Resize(512, 512),
        A.OneOf([
            A.CLAHE(p = 0.1),
            A.RandomBrightnessContrast(contrast_limit=0.1, brightness_by_max=False),
            A.GaussNoise(var_limit=(0.0, 25.0), p = 0.1)
        ]),

        # A.RandomCrop(width=224, height=224, p = 0.1),

        A.Affine(shear=(-10, 10)),

        A.Normalize(),
        ToTensorV2()
    ]
)

aug2 = A.Compose(
    [   
        #A.Resize(512, 512),
        
        A.VerticalFlip(),
        

        A.Normalize(),
        ToTensorV2()
    ]
)

aug3 = A.Compose(
    [   
        #A.Resize(512, 512),
        
        A.HorizontalFlip(),

        A.Normalize(),
        ToTensorV2()
    ]
)

aug4 = A.Compose(
    [   
        #A.Resize(512, 512),
        
        A.Rotate(),

        A.Normalize(),
        ToTensorV2()
    ]
)

transform = A.Compose(
    [   
        #A.Resize(512, 512),
        A.Normalize(),
        ToTensorV2()
    ]
)


aug1_dataset = TV_SatelliteDataset(transform=aug1, is_train=True)
aug2_dataset = TV_SatelliteDataset(transform=aug2, is_train=True)
aug3_dataset = TV_SatelliteDataset(transform=aug3, is_train=True)
aug4_dataset = TV_SatelliteDataset(transform=aug4, is_train=True)
train_ds = TV_SatelliteDataset(transform=transform, is_train=True)

train_ds = ConcatDataset([train_ds,aug1_dataset,aug2_dataset,aug3_dataset,aug4_dataset])
val_ds = TV_SatelliteDataset(transform=transform, is_train=False)


test_ds = SatelliteDataset(csv_file='../data/test.csv', transform=transform, infer=True)

Full dataset size: 193536
Full dataset size: 193536
Full dataset size: 193536
Full dataset size: 193536
Full dataset size: 193536
Full dataset size: 48384
full dataset size :  60640


## huggingface cells

In [28]:
from transformers import TrainingArguments

epochs = 4
lr = 0.00006
batch_size = 64

hub_model_id = "segformer-b3"

training_args = TrainingArguments(
    "segformer-b3",
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=3,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=3000,
    eval_steps=3000,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    push_to_hub=True,
    hub_model_id=hub_model_id,
    hub_strategy="end",
    seed=random_seed
)


In [30]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    #compute_metrics=compute_metrics,
)


Cloning https://huggingface.co/leadawon/segformer-b3 into local empty directory.


In [31]:
#trainer.train(resume_from_checkpoint = True)
trainer.train()



Step,Training Loss,Validation Loss
3000,0.0461,0.045847
6000,0.0372,0.042512
9000,0.0414,0.040026
12000,0.032,0.039431
15000,0.0374,0.039138
18000,0.032,0.039382
21000,0.0333,0.038783
24000,0.0287,0.03828
27000,0.0337,0.037817
30000,0.0286,0.038446


KeyboardInterrupt: 

In [32]:
test_dataloader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=4)

In [33]:
model = SegformerForSemanticSegmentation.from_pretrained(
    "./segformer-b3/checkpoint-27000",
    id2label=id2label,
    label2id=label2id
)

In [34]:

model.to(device)

SegformerForSemanticSegmentation(
  (segformer): SegformerModel(
    (encoder): SegformerEncoder(
      (patch_embeddings): ModuleList(
        (0): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
        (1): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        )
        (2): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((320,), eps=1e-05, elementwise_affine=True)
        )
        (3): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)

In [None]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images["pixel_values"].float().to(device)
        
        outputs = model(images)
        logits = outputs.logits

        upsampled_logits = nn.functional.interpolate(
                logits,
                size=(224,224), # (height, width)
                mode='bilinear',
                align_corners=False
                )

        # Second, apply argmax on the class dimension

        masks = torch.sigmoid(upsampled_logits).cpu().numpy()
        masks = (masks > 0.40).astype(np.uint8) # Threshold = 0.35
        for i in range(len(images)):
            mask_rle = rle_encode(masks[i][0])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

 46%|█████████████████▊                     | 1736/3790 [02:52<03:58,  8.60it/s]

In [None]:
submit = pd.read_csv('../data/sample_submission.csv')
submit['mask_rle'] = result

In [None]:
submit.to_csv('../submit/b3_040_27000steps.csv', index=False)