## data slicing

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # nvidia-smi로 비어있는 gpu 확인하고 여기서 선택할것!

In [2]:
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

#import segmentation_models_pytorch as smp
import matplotlib.pyplot as plt
from IPython.display import clear_output 
import time
import os
import json
import random


random_seed = 42
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU

np.random.seed(random_seed)
random.seed(random_seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cpu')
print(device)

cuda


In [4]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    if mask_rle == -1:
        return np.zeros(shape, dtype=np.uint8)
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)
class TV_SatelliteDataset(Dataset):
    def __init__(self, csv_file="../data/jhs_stride_160.csv", transform=None, is_train = True, stride=200):
        self.is_train = is_train
        self.transform = transform
        self.stride = stride
        self.size = 224
        self.cutter = int(241920*0.9) #tv cutter

        if self.is_train:
            self.data = pd.read_csv(csv_file)[:self.cutter]
        else:
            self.data = pd.read_csv(csv_file)[self.cutter:]


        
        print("Full dataset size:", len(self.data))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        
        augmented = self.transform(image=cv2.cvtColor(cv2.imread(".."+self.data.iloc[idx, 1][1:]), cv2.COLOR_BGR2RGB), mask=rle_decode(self.data.iloc[idx, 2], (224, 224)))
        
        return {"pixel_values":augmented['image'],"labels":augmented['mask'].type(torch.LongTensor)}

        
        
aug1 = A.Compose(
    [   
        A.Resize(224, 224),
        
        A.Rotate(),

        A.Normalize(),
        ToTensorV2()
    ]
)

aug2 = A.Compose(
    [   
        A.Resize(224, 224),
        
        A.VerticalFlip(),
        

        A.Normalize(),
        ToTensorV2()
    ]
)

aug3 = A.Compose(
    [   
        A.Resize(224, 224),
        
        A.HorizontalFlip(),

        A.Normalize(),
        ToTensorV2()
    ]
)

transform = A.Compose(
    [   
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)



aug1_dataset = TV_SatelliteDataset(transform=aug1, is_train=True)
aug2_dataset = TV_SatelliteDataset(transform=aug2, is_train=True)
aug3_dataset = TV_SatelliteDataset(transform=aug3, is_train=True)
train_ds = TV_SatelliteDataset(transform=transform, is_train=True)

train_ds = ConcatDataset([train_ds,aug1_dataset,aug2_dataset,aug3_dataset])
val_ds = TV_SatelliteDataset(transform=transform, is_train=False)



Full dataset size: 217728
Full dataset size: 217728
Full dataset size: 217728
Full dataset size: 217728
Full dataset size: 24192


In [3]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
id2label = {1:'building'}
label2id = {'building':1}

In [None]:
from transformers import SegformerForSemanticSegmentation

pretrained_model_name = "./best_model/segformer-b4-27000steps/checkpoint-27000" 
model = SegformerForSemanticSegmentation.from_pretrained(
    pretrained_model_name,
    id2label=id2label,
    label2id=label2id,
    #ignore_mismatched_sizes=True
)


In [7]:
from transformers import TrainingArguments

epochs = 2
lr = 0.00006
batch_size = 32

hub_model_id = "b4_48batch_27000steps_040"

training_args = TrainingArguments(
    "b4_48batch_27000steps_040",
    learning_rate=lr,
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    save_total_limit=30,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_steps=3000,
    eval_steps=3000,
    logging_steps=1,
    eval_accumulation_steps=5,
    load_best_model_at_end=True,
    push_to_hub=True,
    hub_model_id=hub_model_id,
    hub_strategy="end",
    seed=random_seed
)


In [8]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    #compute_metrics=compute_metrics,
)


Cloning https://huggingface.co/leadawon/b5_jhsstride_040 into local empty directory.


In [10]:
trainer.push_to_hub()

Several commits (2) will be pushed upstream.
The progress bars may be unreliable.


Upload file pytorch_model.bin:   0%|          | 1.00/323M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [17]:
#trainer.train(resume_from_checkpoint = True)
## 30000 steps 32batch



Step,Training Loss,Validation Loss
12000,0.0362,0.040822
15000,0.037,0.040068
18000,0.035,0.038748
21000,0.0368,0.038025
24000,0.0362,0.038506
27000,0.037,0.037737
30000,0.0475,0.037172
33000,0.0391,0.037538
36000,0.0283,0.03743
39000,0.0333,0.037633


TrainOutput(global_step=54432, training_loss=0.02640183386249169, metrics={'train_runtime': 59198.559, 'train_samples_per_second': 29.423, 'train_steps_per_second': 0.919, 'total_flos': 1.3308004929620253e+20, 'train_loss': 0.02640183386249169, 'epoch': 2.0})

In [33]:
test_dataloader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=4)

In [None]:
torch.save(model, f'../best_model/huggingface_model_0.pth')

In [34]:
model = SegformerForSemanticSegmentation.from_pretrained(
    "./segformer-b0-finetuned-segments-sidewalk-outputs/checkpoint-30000",
    id2label=id2label,
    label2id=label2id
)

In [None]:
#model = torch.load('../best_model/huggingface_model_0.pth')
model.to(device)

In [36]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images["pixel_values"].float().to(device)
        
        outputs = model(images)
        logits = outputs.logits
        # masks = torch.sigmoid(outputs).cpu().numpy()
        # print(masks.shape)
        # masks = np.squeeze(masks, axis=1)
        # masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35
        upsampled_logits = nn.functional.interpolate(
                logits,
                size=(224,224), # (height, width)
                mode='bilinear',
                align_corners=False
                )

        # Second, apply argmax on the class dimension
        #pred_seg = upsampled_logits.argmax(dim=1)[0]
        masks = torch.sigmoid(upsampled_logits).cpu().numpy()
        # print(masks.shape)
        # masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.40).astype(np.uint8) # Threshold = 0.35
        for i in range(len(images)):
            mask_rle = rle_encode(masks[i][0])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

100%|██████████| 3790/3790 [05:23<00:00, 11.71it/s]


In [37]:
submit = pd.read_csv('../data/sample_submission.csv')
submit['mask_rle'] = result

In [38]:
submit.to_csv('../submit/b5_jhsstride_040.csv', index=False)