In [1]:
import os
import random
import time
import json
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from pycocotools.coco import COCO
import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

import matplotlib.pyplot as plt
from torch.cuda.amp import GradScaler, autocast
from sklearn.model_selection import StratifiedKFold
import segmentation_models_pytorch as smp

from utils import *
from dataloader import *
from loss import *
from evaluate import *
from scheduler import *

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print (f"This notebook use {device}")

This notebook use cuda:0


In [2]:
SEED = 77
BATCH_SIZE = 8
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)
random.seed(SEED)

## Train Function

In [6]:
def fold_train(model, train_loader, val_loader, EPOCHS=21, save_model_name='fold_default'):
    
    # hyper parameters
    LR_start = 2e-6
    LR_max = 1e-4
    accumulation_step = 1
    print_every = 1
    best_val_mIoU = 0.42
    best_val_mIoU2 = 0.44
    best_val_mIoU3 = 0.52

    criterion = IoU_CE_Loss(iou_rate=0.4, weight=None)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR_start)
    scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=20, eta_max=LR_max, T_up=2, gamma=0.5)

    for epoch in range(EPOCHS):
        epoch+=1
        avg_loss = 0
        batch_count = len(train_loader)

        for step, (images, masks) in enumerate(train_loader):
            start = time.time()
            imgs, masks = images.to(device), masks.long().to(device)

            output = model(imgs)
            loss = criterion(output, masks)
            loss.backward()

            if (step+1)%accumulation_step==0:
                optimizer.step()
                optimizer.zero_grad()

            avg_loss += loss.item() / batch_count
            print(f"\rEpoch:{epoch:3d}  step:{step:3d}/{batch_count-1}  time:{time.time() - start:.3f}  LR:{scheduler.get_lr()[0]:.6f}", end='')

        scheduler.step()
        val_loss, val_mIoU, val_mIoU2, val_mIoU3 = validation3(model, val_loader, criterion, device)
        print(f"   loss:{avg_loss:.3f}  val_loss:{val_loss:.3f}  val_mIoU:{val_mIoU:.3f}  val_mIoU2:{val_mIoU2:.3f}  val_mIooU3:{val_mIoU3:.3f}  {epoch}")
        if best_val_mIoU < val_mIoU:
            save_model(model, saved_dir="model", file_name=save_model_name + f'_epoch{epoch}_miou1.pt')
            best_val_mIoU = val_mIoU
        elif best_val_mIoU2 < val_mIoU2:
            save_model(model, saved_dir="model", file_name=save_model_name + f'_epoch{epoch}_miou2.pt')
            best_val_mIoU2 = val_mIoU2
        elif best_val_mIoU3 < val_mIoU3:
            save_model(model, saved_dir="model", file_name=save_model_name + f'_epoch{epoch}_miou3.pt')
            best_val_mIoU3 = val_mIoU3

## data load & transform

In [7]:
kfold_dataset = np.load('input/data/img_all.npy', allow_pickle=True)
anns_cnt = np.load('input/data/anns_cnt.npy')

train_transform = A.Compose([
    A.Resize(256, 256),
    A.RandomRotate90(),
    A.HorizontalFlip(p=0.5),
    ToTensorV2()
])

test_transform = A.Compose([
    A.Resize(256, 256),
    ToTensorV2()
])

## 5-Fold train

In [8]:
skf = StratifiedKFold(n_splits=5)
for k, (train_idx, valid_idx) in enumerate(skf.split(kfold_dataset, anns_cnt)):
    
    ## DataLoader ##
    train_dataset = KFoldDataset(dataset=kfold_dataset[train_idx], mode='train', transform=train_transform)
    val_dataset = KFoldDataset(dataset=kfold_dataset[valid_idx], mode='val', transform=train_transform)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, drop_last=True)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=1)
    
    ## model ##
    model = smp.DeepLabV3Plus(
        encoder_name='resnext50_32x4d',
        encoder_weights='swsl',
        classes=12
    ).to(device)
    
    ## train ##
    print("-"*50 + f" Fold{k+1} Start training " + "-"*50)
    fold_train(model, train_loader, val_loader, EPOCHS=21, save_model_name=f'[fold{k+1}]rxt50_resize_rotateFlip')
    print("-"*50 + f" Fold{k+1} Finish training " + "-"*50)

loading annotations into memory...
Done (t=4.14s)
creating index...
index created!
loading annotations into memory...
Done (t=5.12s)
creating index...
index created!
--------------------------------------------------Fold1 Start training--------------------------------------------------
Epoch:  1  step:326/326  time:0.276  LR:0.000002   loss:1.733  val_loss:1.553  val_mIoU:0.114  val_mIoU2:0.125  val_mIooU3:0.089  1
Epoch:  2  step:326/326  time:0.275  LR:0.000051   loss:0.883  val_loss:0.678  val_mIoU:0.318  val_mIoU2:0.279  val_mIooU3:0.418  2
Epoch:  3  step:326/326  time:0.278  LR:0.000100   loss:0.579  val_loss:0.616  val_mIoU:0.308  val_mIoU2:0.264  val_mIooU3:0.415  3
Epoch:  4  step:326/326  time:0.278  LR:0.000099   loss:0.493  val_loss:0.599  val_mIoU:0.314  val_mIoU2:0.311  val_mIooU3:0.420  4
Epoch:  5  step:326/326  time:0.275  LR:0.000097   loss:0.443  val_loss:0.553  val_mIoU:0.341  val_mIoU2:0.360  val_mIooU3:0.449  5
Epoch:  6  step:326/326  time:0.280  LR:0.000093   lo

In [9]:
save_model(model, saved_dir="model", file_name='[fold5]rxt50_resize_rotateFlip_epoch21.pt')