In [1]:
import sys
sys.path.append('../')

In [2]:
import os, glob, random, cv2
import wandb
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import albumentations as A
import segmentation_models_pytorch as smp
import model.metric as module_metric

from data_loader.dataloader import get_dataloader
from utils.data import get_datasize
from utils.visual import *
from albumentations.pytorch import transforms
from model.loss import *
from train import *
from pathlib import Path


In [3]:
SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

In [4]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

lr = 1e-3
batch_size = 16
num_epoch = 200
damage = 'dent'

train_dir = f'./dataset/{damage}/train/'
val_dir = f'./dataset/{damage}/valid/'

In [5]:
transform_train = A.Compose([
    A.Resize(512, 512),
    A.HorizontalFlip(),
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.Rotate((-10, 10), p=0.5, border_mode=cv2.BORDER_REFLECT,),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    transforms.ToTensorV2(transpose_mask=True)
])

transform_val = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    transforms.ToTensorV2(transpose_mask=True)
])

In [6]:
train_dataloader = get_dataloader(train_dir, transform_train, batch_size, True)
val_dataloader = get_dataloader(val_dir, transform_val, batch_size, False)

In [7]:
model = smp.Unet(encoder_name='efficientnet-b0', encoder_weights='imagenet', in_channels=3, classes=1, activation=None)
model = model.to(device)

In [8]:
criterion = DiceLoss().to(device)
optimizer = optim.SGD(model.parameters(), momentum=0.9, lr=lr)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=20, T_mult=2, eta_min=1e-5)
metrics = [getattr(module_metric, met) for met in ['IOUscore', 'PixelAccuracy']]

In [9]:
train_config = {}
train_config['Batch size'] = batch_size
train_config['Learning Rate'] = lr
train_config['Epochs'] = num_epoch

train_config['Loss fn'] = criterion.__class__.__name__
train_config['Optimizer'] = optimizer.__class__.__name__
train_config['LR Scheduler'] = scheduler.__class__.__name__
train_config['Metric'] = {str(idx+1) : metric for idx, metric in enumerate([metrics[i].__name__ for i in range(len(metrics))])}


In [10]:
save_dir = f"./saved/U-Net_{model.__dict__['name'].split('-')[1:][0].capitalize()}/{model.__dict__['name'].split('-')[-1].capitalize()}_{damage}_ver0/"

trainer = Trainer(model, criterion, metrics, optimizer, device, num_epoch, save_dir,
                                  data_loader=train_dataloader, valid_data_loader=val_dataloader,
                                  lr_scheduler=scheduler)

In [11]:
wandb.init(project=trainer.dir.split('/')[1], name=f"{trainer.dir.split('/')[2]}", config=train_config)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mimlim[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [12]:
trainer.train()


Epoch : 0 | Train Loss : 0.98410 | Train P.A : 40.61% | Train IOU : 0.00894 | Val Loss : 0.98228 | Val P.A : 36.56% | Val IOU : 0.00942 | Training Time : 86.16sec

Epoch : 1 | Train Loss : 0.98370 | Train P.A : 40.09% | Train IOU : 0.00960 | Val Loss : 0.98176 | Val P.A : 37.82% | Val IOU : 0.01064 | Training Time : 86.53sec

Epoch : 2 | Train Loss : 0.98290 | Train P.A : 40.24% | Train IOU : 0.01027 | Val Loss : 0.98123 | Val P.A : 40.34% | Val IOU : 0.01130 | Training Time : 86.49sec

Epoch : 3 | Train Loss : 0.98261 | Train P.A : 41.41% | Train IOU : 0.01059 | Val Loss : 0.98076 | Val P.A : 41.45% | Val IOU : 0.01178 | Training Time : 86.25sec

Epoch : 4 | Train Loss : 0.98236 | Train P.A : 42.17% | Train IOU : 0.01071 | Val Loss : 0.98024 | Val P.A : 42.07% | Val IOU : 0.01212 | Training Time : 85.25sec

Epoch : 5 | Train Loss : 0.98167 | Train P.A : 43.11% | Train IOU : 0.01108 | Val Loss : 0.97963 | Val P.A : 43.20% | Val IOU : 0.01240 | Training Time : 85.78sec

Epoch : 6 | Tra

[34m[1mwandb[0m: Network error resolved after 0:00:11.423151, resuming normal operation.


Train Loss : 0.97842 | Train P.A : 53.30% | Train IOU : 0.01238 | Val Loss : 0.97506 | Val P.A : 53.29% | Val IOU : 0.01428 | Training Time : 86.77sec

Epoch : 16 | Train Loss : 0.97737 | Train P.A : 54.50% | Train IOU : 0.01326 | Val Loss : 0.97463 | Val P.A : 53.84% | Val IOU : 0.01448 | Training Time : 85.60sec

Epoch : 17 | Train Loss : 0.97724 | Train P.A : 55.49% | Train IOU : 0.01315 | Val Loss : 0.97420 | Val P.A : 55.50% | Val IOU : 0.01483 | Training Time : 84.85sec

Epoch : 18 | Train Loss : 0.97669 | Train P.A : 57.02% | Train IOU : 0.01366 | Val Loss : 0.97336 | Val P.A : 57.57% | Val IOU : 0.01552 | Training Time : 80.42sec

Epoch : 19 | Train Loss : 0.97640 | Train P.A : 58.79% | Train IOU : 0.01375 | Val Loss : 0.97281 | Val P.A : 59.09% | Val IOU : 0.01590 | Training Time : 83.38sec

Epoch : 20 | Train Loss : 0.97618 | Train P.A : 60.26% | Train IOU : 0.01397 | Val Loss : 0.97219 | Val P.A : 60.45% | Val IOU : 0.01633 | Training Time : 79.02sec

Epoch : 21 | Train Loss

VBox(children=(Label(value='548.900 MB of 548.900 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
Train IOU,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▄▄▅▅▆▆▆▆▇▇▇▇▇▇█▇████
Train Loss,█████████████████▇▇▇▅▄▄▄▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁▁
Train P.A,▁▁▁▂▂▃▃▃▄▄▅▅▅▆▆▇▇▇██████████████████████
Val IOU,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▄▆▇▇▇▇▇█▇▇▇▇▇██▇█▆▇██
Val Loss,██████████████▇▇▇▇▆▅▃▂▂▂▂▂▁▁▁▁▂▂▁▁▂▁▂▁▁▁
Val P.A,▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▇▇███████████████████████

0,1
Train IOU,0.48031
Train Loss,0.37556
Train P.A,99.52418
Val IOU,0.23728
Val Loss,0.6498
Val P.A,99.10379
