In [7]:
import os
import random
import time
import json
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from pycocotools.coco import COCO
import cv2
import torchvision
import torchvision.transforms as transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2

import matplotlib.pyplot as plt
from natsort import natsorted
from torch.cuda.amp import GradScaler, autocast

from utils import *
from dataloader import *
from loss import *
from evaluate import *
from scheduler import *

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print (f"This notebook use {device}")

This notebook use cuda:0


In [2]:
SEED = 77
BATCH_SIZE = 8
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)
random.seed(SEED)

## Dataset

In [4]:
dataset_path = 'input/data'
train_path = dataset_path + '/train.json'
val_path = dataset_path + '/val.json'
test_path = dataset_path + '/test.json'

train_transform = A.Compose([
    A.Resize(256, 256),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(256, 256),
    ToTensorV2()
])


train_dataset = CustomDataLoader(data_dir=train_path, mode='train', transform=train_transform)
val_dataset = CustomDataLoader(data_dir=val_path, mode='val', transform=test_transform)
test_dataset = CustomDataLoader(data_dir=test_path, mode='test', transform=test_transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

loading annotations into memory...
Done (t=3.75s)
creating index...
index created!
loading annotations into memory...
Done (t=0.74s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


## Model

In [5]:
import torch.nn as nn
# from torchvision import models
# model = models.segmentation.deeplabv3_resnet50(pretrained=True)
# model.classifier[4] = nn.Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
# model.aux_classifier[4] = nn.Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
# model.to(device)

import segmentation_models_pytorch as smp
model = smp.DeepLabV3Plus(
    encoder_name='resnext50_32x4d',#'efficientnet-b4',
    encoder_weights='imagenet', 
    classes=12
).to(device)

calculate_parameter(model)

--------------------------------------------------
Total number of parameters: [26,152,284]
--------------------------------------------------
Total number of Conv layer : 67


## Train

In [8]:
save_model_name = f'resnext50_batch8_resize_iouCE'

# hyper parameters
EPOCHS = 60
LR_start = 2e-6
LR_max = 1e-4
accumulation_step = 1
print_every = 1
best_val_mIoU = 0.40

# loss
scaler = GradScaler()
#weights = get_class_weight(train_set['label'].tolist())
#class_weights = torch.FloatTensor(weights).cuda()
#criterion = LabelSmoothingLoss(classes=42, smoothing=0.2)

criterion = IoU_CE_Loss(iou_rate=0.4, weight=None)
optimizer = torch.optim.Adam(model.parameters(), lr=LR_start)
scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=20, eta_max=LR_max, T_up=2, gamma=0.5)

scaler = GradScaler()
print("Start training..")
for epoch in range(EPOCHS):
    epoch+=1
    avg_loss = 0
    batch_count = len(train_loader)

    for step, (images, masks) in enumerate(train_loader):
        start = time.time()
        images, masks = images.to(device), masks.long().to(device)
        
        with autocast():
            output = model(images)
            loss = criterion(output, masks)
        scaler.scale(loss).backward()

        if (step+1)%accumulation_step==0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        avg_loss += loss.item() / batch_count
        print(f"\rEpoch:{epoch:3d}  step:{step:3d}/{batch_count-1}  time:{time.time() - start:.3f}  LR:{scheduler.get_lr()[0]:.6f}", end='')
        
    scheduler.step()
    val_loss, val_mIoU = validation(model, val_loader, criterion, device)
    print(f"   loss: {avg_loss:.3f}  val_loss: {val_loss:.3f}  val_mIoU:{val_mIoU:.3f}")
    if best_val_mIoU < val_mIoU:
        save_model(model, saved_dir="model", file_name=save_model_name + f'_epoch{epoch}_score{val_mIoU:.3f}.pt')
        best_val_mIoU = val_mIoU
print("Finish training")

Start training..
Epoch:  1  step:326/326  time:0.472  LR:0.000002   loss: 1.724  val_loss: 1.571  val_mIoU:0.104
Epoch:  2  step:326/326  time:0.470  LR:0.000051   loss: 0.930  val_loss: 0.639  val_mIoU:0.311
Epoch:  3  step:326/326  time:0.471  LR:0.000100   loss: 0.584  val_loss: 0.531  val_mIoU:0.350
Epoch:  4  step:326/326  time:0.470  LR:0.000099   loss: 0.451  val_loss: 0.469  val_mIoU:0.386
Epoch:  5  step:326/326  time:0.463  LR:0.000097   loss: 0.374  val_loss: 0.483  val_mIoU:0.373
Epoch:  6  step:326/326  time:0.466  LR:0.000093   loss: 0.329  val_loss: 0.493  val_mIoU:0.378
Epoch:  7  step:326/326  time:0.468  LR:0.000089   loss: 0.298  val_loss: 0.442  val_mIoU:0.423
Epoch:  8  step:326/326  time:0.467  LR:0.000082   loss: 0.261  val_loss: 0.437  val_mIoU:0.412
Epoch:  9  step:326/326  time:0.470  LR:0.000076   loss: 0.239  val_loss: 0.457  val_mIoU:0.401
Epoch: 10  step:326/326  time:0.470  LR:0.000068   loss: 0.217  val_loss: 0.436  val_mIoU:0.419
Epoch: 11  step:326/326

KeyboardInterrupt: 

## Inference

In [9]:
load_model(model, device, saved_dir="model", file_name="resnext50_batch8_resize_iouCE_epoch11_score0.431.pt")

load success


In [10]:
size = 256
transform = A.Compose([A.Resize(256, 256)])
model.eval()

preds_array = np.empty((0, size*size), dtype=np.long)
with torch.no_grad():
    for step, imgs in enumerate(test_loader):

        # inference (512 x 512)
        outs = model(imgs.to(device))
        oms = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()
        # resize (256 x 256)
#         temp_mask = []
#         for img, mask in zip(np.stack(imgs), oms):
#             transformed = transform(image=img, mask=mask)
#             mask = transformed['mask']
#             temp_mask.append(mask)

#         oms = np.array(temp_mask)

        oms = oms.reshape([oms.shape[0], size*size]).astype(int)
        preds_array = np.vstack((preds_array, oms))
        
        print(f"\rstep:{step+1:3d}/{len(test_loader)}", end='')
print("    End prediction.")

step:105/105    End prediction.


In [11]:
submission = pd.read_csv('submission/sample_submission.csv')

preds=[]
for i, pred in enumerate(preds_array):
    pred_str = [str(p) for p in pred]
    preds.append(' '.join(pred_str))
    print(f"\rTo string.. {i+1:3d}/{len(preds_array)}", end='')
    
submission['PredictionString'] = preds
submission.to_csv('submission/sm9_DL3P+resnext50_epoch11_batch8_resize_iouCE.csv', index=False)

To string.. 837/837

## submit

In [12]:
file_name = "sm9_DL3P+resnext50_epoch11_batch8_resize_iouCE.csv"
description = "DL3P+resnext50_epoch11_batch8_resize_iouCE loss: 0.204  val_loss: 0.428  val_mIoU:0.431"

submit("submission/"+file_name, description, key='정훈님')

http://ec2-13-124-161-225.ap-northeast-2.compute.amazonaws.com:8000/api/v1/competition/28/presigned_url/?hyperparameters=%7B%22training%22%3A%7B%7D%2C%22inference%22%3A%7B%7D%7D&description=DL3P%2Bresnext50_epoch11_batch8_resize_iouCE+loss%3A+0.204++val_loss%3A+0.428++val_mIoU%3A0.431
{"url":"https://prod-aistages-private.s3.amazonaws.com/","fields":{"key":"app/Competitions/000028/Users/00000085/Submissions/0004/output.csv","x-amz-algorithm":"AWS4-HMAC-SHA256","x-amz-credential":"AKIA45LU4MHUJ7WLDQVO/20210428/ap-northeast-2/s3/aws4_request","x-amz-date":"20210428T142613Z","policy":"eyJleHBpcmF0aW9uIjogIjIwMjEtMDQtMjhUMTU6MjY6MTNaIiwgImNvbmRpdGlvbnMiOiBbeyJidWNrZXQiOiAicHJvZC1haXN0YWdlcy1wcml2YXRlIn0sIHsia2V5IjogImFwcC9Db21wZXRpdGlvbnMvMDAwMDI4L1VzZXJzLzAwMDAwMDg1L1N1Ym1pc3Npb25zLzAwMDQvb3V0cHV0LmNzdiJ9LCB7IngtYW16LWFsZ29yaXRobSI6ICJBV1M0LUhNQUMtU0hBMjU2In0sIHsieC1hbXotY3JlZGVudGlhbCI6ICJBS0lBNDVMVTRNSFVKN1dMRFFWTy8yMDIxMDQyOC9hcC1ub3J0aGVhc3QtMi9zMy9hd3M0X3JlcXVlc3QifSwgeyJ4LWFtei1kYXR