In [1]:
import os
from pathlib import Path
from glob import glob
from tqdm import tqdm
import numpy as np
import pandas as pd
from time import time
import datetime

import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader, TensorDataset

from yolo import YoloBody
from nets.yolo_training import (YOLOLoss, get_lr_scheduler, set_optimizer_lr,
                                weights_init)
from utils.dataloader import YoloDataset, yolo_dataset_collate
from utils.utils import get_anchors, get_classes, show_config

RANDOM_STATE = 11
PATH_MODEL_DATA = "./model_data/"
PATH_LOGS = "./logs/"

In [2]:
pretrained = False
classes_path = 'model_data/voc_classes.txt'
anchors_path = 'model_data/yolo_anchors.txt'
anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
model_path = 'model_data/yolo4_weights.pth'
input_shape = [416, 416]

train_annotation_path = '2007_train.txt'
val_annotation_path = '2007_val.txt'

class_names, num_classes = get_classes(classes_path)
anchors, num_anchors = get_anchors(anchors_path)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_workers = 4

epochs = 70
batch_size = 32
learning_rate = 1e-4

In [3]:
#------------------------------------------------------------------#
#   Init_lr         模型的最大学习率
#   Min_lr          模型的最小学习率，默认为最大学习率的0.01
#------------------------------------------------------------------#
Init_lr             = 1e-2
Min_lr              = Init_lr * 0.01
#------------------------------------------------------------------#
#   optimizer_type  使用到的优化器种类，可选的有adam、sgd
#                   当使用Adam优化器时建议设置  Init_lr=1e-3
#                   当使用SGD优化器时建议设置   Init_lr=1e-2
#   momentum        优化器内部使用到的momentum参数
#   weight_decay    权值衰减，可防止过拟合
#                   adam会导致weight_decay错误，使用adam时建议设置为0。
#------------------------------------------------------------------#
optimizer_type      = "sgd"
momentum            = 0.937
weight_decay        = 5e-4

#------------------------------------------------------------------#
#   label_smoothing     标签平滑。一般0.01以下。如0.01、0.005。
#------------------------------------------------------------------#
label_smoothing     = 0

#------------------------------------------------------------------#
#   focal_loss      是否使用Focal Loss平衡正负样本
#   focal_alpha     Focal Loss的正负样本平衡参数
#   focal_gamma     Focal Loss的难易分类样本平衡参数
#------------------------------------------------------------------#
focal_loss          = False
focal_alpha         = 0.25
focal_gamma         = 2

#------------------------------------------------------------------#
#   iou_type        使用什么iou损失，ciou或者siou
#------------------------------------------------------------------#
iou_type            = 'ciou'


### Build Model

In [4]:
model = YoloBody(anchors_mask, num_classes, pretrained=pretrained)
if not pretrained:
    weights_init(model)
if model_path != '':
    print('Load weights {}.'.format(model_path))
    
    model_dict      = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location = device)
    load_key, no_load_key, temp_dict = [], [], {}
    for k, v in pretrained_dict.items():
        if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
            temp_dict[k] = v
            load_key.append(k)
        else:
            no_load_key.append(k)
    model_dict.update(temp_dict)
    model.load_state_dict(model_dict)

    print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key))
    print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key))
    print("\n\033[1;33;44m温馨提示，head部分没有载入是正常现象，Backbone部分没有载入是错误的。\033[0m")

yolo_loss = YOLOLoss(anchors, num_classes, input_shape, True, anchors_mask, label_smoothing, focal_loss, focal_alpha, focal_gamma, iou_type)
seg_loss = nn.CrossEntropyLoss()
obj_optimizer = optim.Adam(model.parameters(), lr=learning_rate)
seg_optimizer = optim.Adam(model.parameters(), lr=learning_rate)

model_train = model.train().to(device)

initialize network with normal type
Load weights model_data/yolo4_weights.pth.

Successful Load Key: ['backbone.conv1.conv.weight', 'backbone.conv1.bn.weight', 'backbone.conv1.bn.bias', 'backbone.conv1.bn.running_mean', 'backbone.conv1.bn.running_var', 'backbone.conv1.bn.num_batches_tracked', 'backbone.stages.0.downsample_conv.conv.weight', 'backbone.stages.0.downsample_conv.bn.weight', 'backbone.stages.0.downsample_conv.bn.bias', 'backbone.stages.0.downsample_conv.bn.running_mean', 'backbone.stages.0.downsample_conv.bn.running_var', 'backbone.stages.0.downsample_conv.bn.num_batches_tracked', ' ……
Successful Load Key Num: 642

Fail To Load Key: ['yolo_head3.1.weight', 'yolo_head3.1.bias', 'yolo_head2.1.weight', 'yolo_head2.1.bias', 'yolo_head1.1.weight', 'yolo_head1.1.bias'] ……
Fail To Load Key num: 6

[1;33;44m温馨提示，head部分没有载入是正常现象，Backbone部分没有载入是错误的。[0m


### Load data

#### VOC_2007

In [5]:
with open(train_annotation_path, encoding='utf-8') as f:
    train_lines = f.readlines()
with open(val_annotation_path, encoding='utf-8') as f:
    val_lines   = f.readlines()
num_train = len(train_lines)
num_val = len(val_lines)


train_yolo_dataset = YoloDataset(train_lines, input_shape, num_classes, epoch_length=epochs, train=True)
val_yolo_dataset = YoloDataset(val_lines, input_shape, num_classes, epoch_length=epochs, train=False)

train_yolo_loader = DataLoader(train_yolo_dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers,
                          pin_memory=True, drop_last=True, collate_fn=yolo_dataset_collate)
val_yolo_loader = DataLoader(val_yolo_dataset, shuffle=True, batch_size=batch_size, num_workers=num_workers,
                        pin_memory=True, drop_last=True, collate_fn=yolo_dataset_collate)

In [6]:
len(train_yolo_dataset)

6973

#### ADE20K

In [7]:
np_ADE20K = np.load('../ADE20K_DL_course/ADE20K_DL_seg.npz')
train_ade_X, train_ade_y = np_ADE20K['train_X'], np_ADE20K['train_y']
val_ade_X, val_ade_y = np_ADE20K['val_X'], np_ADE20K['val_y']
test_ade_X, test_ade_y = np_ADE20K['test_X'], np_ADE20K['test_y']
del np_ADE20K

In [8]:
train_ade_X, train_ade_y = torch.Tensor(train_ade_X), torch.Tensor(train_ade_y)
val_ade_X, val_ade_y = torch.Tensor(val_ade_X), torch.Tensor(val_ade_y)
test_ade_X, test_ade_y = torch.Tensor(test_ade_X), torch.Tensor(test_ade_y)

train_ade_X = train_ade_X.permute([0, 3, 1, 2])
val_ade_X = val_ade_X.permute([0, 3, 1, 2])
test_ade_X = test_ade_X.permute([0, 3, 1, 2])

In [9]:
train_ade_dataset = TensorDataset(train_ade_X, train_ade_y)
val_ade_dataset = TensorDataset(val_ade_X, val_ade_y)
test_ade_dataset = TensorDataset(test_ade_X, test_ade_y)

# adjust ade batch_size to match alternative learning
# ade_batch_size = len(train_ade_dataset) // (len(train_yolo_dataset) // batch_size)
ade_batch_size = 16
train_ade_loader = DataLoader(train_ade_dataset, shuffle=True, batch_size=ade_batch_size, num_workers=num_workers, drop_last=True)
val_ade_loader = DataLoader(val_ade_dataset, shuffle=True, batch_size=ade_batch_size, num_workers=num_workers, drop_last=True)

In [10]:
train_ade_X.shape, train_ade_y.max()

(torch.Size([1400, 3, 416, 416]), tensor(149.))

In [11]:
len(train_yolo_loader), len(train_ade_loader), len(val_yolo_loader), len(val_ade_loader)

(217, 87, 31, 12)

### Training

In [12]:
def fit_one_epoch(model, obj_optimizer, seg_optimizer, obj_loss, seg_loss,
                  train_yolo_loader, val_yolo_loader, train_ade_loader, val_ade_loader,
                  e, epochs, batch_size):
    # training
    model.train()
    loss_seg_total, loss_obj_total = 0, 0
    pbar = tqdm(desc=f'Epoch {e+1} / {epochs}: Train', total=len(train_yolo_loader))
    train_ade_loader_iter = iter(train_ade_loader)
    for batch_i, (X_yolo, y_yolo) in enumerate(train_yolo_loader):
        # object detection
        X_yolo, y_yolo = X_yolo.to(device), [_.to(device) for _ in y_yolo]
        obj_optimizer.zero_grad()
        output = model(X_yolo)
        yolo_ouput = output[:3]

        loss_value = 0
        for l in range(len(yolo_ouput)):
            loss_item = obj_loss(l, yolo_ouput[l], y_yolo)
            loss_value += loss_item
        loss_value.backward()
        obj_optimizer.step()
        loss_obj_total += loss_value.item()

        # semantic segmentation
        try:
            X_ade, y_ade = next(train_ade_loader_iter)
        except StopIteration:
            train_ade_loader_iter = iter(train_ade_loader)
            X_ade, y_ade = next(train_ade_loader_iter)
        X_ade, y_ade = X_ade.to(device), y_ade.to(device)
        seg_optimizer.zero_grad()
        output = model(X_ade)
        segment_output = output[3]

        loss_seg = seg_loss(segment_output, y_ade.to(torch.long))
        loss_seg.backward()
        seg_optimizer.step()
        loss_seg_total += loss_seg.item()

        pbar.update(1)
        pbar.set_postfix(train_obj_loss=loss_obj_total/(batch_i + 1), 
                         train_seg_loss=loss_seg_total/(batch_i + 1))
    pbar.close()

    # val
    model.eval()
    val_loss_obj_total, val_loss_seg_total = 0, 0
   
    pbar = tqdm(desc=f'Epoch {e+1} / {epochs}: Val', total=len(val_yolo_loader))
    val_ade_loader_iter = iter(val_ade_loader)
    with torch.no_grad():
        for batch_i, (X_yolo, y_yolo) in enumerate(val_yolo_loader):
            # object detection
            X_yolo, y_yolo = X_yolo.to(device), [_.to(device) for _ in y_yolo]
            obj_optimizer.zero_grad()
            output = model(X_yolo)
            yolo_ouput = output[:3]

            loss_value = 0
            for l in range(len(yolo_ouput)):
                loss_item = obj_loss(l, yolo_ouput[l], y_yolo)
                loss_value += loss_item
            val_loss_obj_total += loss_value.item()

            # semantic segmentation
            try:
                X_ade, y_ade = next(val_ade_loader_iter)
            except StopIteration:
                val_ade_loader_iter = iter(val_ade_loader)
                X_ade, y_ade = next(val_ade_loader_iter)
            X_ade, y_ade = X_ade.to(device), y_ade.to(device)
            seg_optimizer.zero_grad()
            output = model(X_ade)
            segment_output = output[3]

            loss_seg = seg_loss(segment_output, y_ade.to(torch.long))
            val_loss_seg_total += loss_seg.item()


            pbar.update(1)
            pbar.set_postfix(val_obj_loss=val_loss_obj_total/(batch_i + 1), 
                            val_seg_loss=val_loss_seg_total/(batch_i + 1))
    pbar.close()
    return {
        "epoch": e+1,
        "train_obj_loss": round(loss_obj_total / len(train_yolo_loader), 4),
        "train_seg_loss": round(loss_seg_total / len(train_yolo_loader), 4),
        "val_obj_loss": round(val_loss_obj_total / len(val_yolo_loader), 4),
        "val_seg_loss": round(val_loss_seg_total / len(val_yolo_loader), 4),
    }
    

# fit_one_epoch(model, obj_optimizer, seg_optimizer, yolo_loss, seg_loss,
#               train_yolo_loader, val_yolo_loader, train_ade_loader, val_ade_loader,
#               0, epochs, batch_size)

In [13]:
def save_his(dict_all, start_datetime):
    pd.DataFrame(dict_all).to_csv(Path(PATH_LOGS) / f'training_history_{start_datetime}.csv', index=False)

In [14]:
dict_all = {
    "epoch": [], 
    "train_obj_loss": [], "train_seg_loss": [],
    "val_obj_loss": [], "val_seg_loss": []
}
start_datetime = str(datetime.datetime.now()).split('.')[0].replace(' ', '_')
for e in range(epochs):
    best_obj_loss, best_seg_loss = 1e8, 1e8
    dict_his = fit_one_epoch(
        model, obj_optimizer, seg_optimizer, yolo_loss, seg_loss,
        train_yolo_loader, val_yolo_loader, train_ade_loader, val_ade_loader,
        e, epochs, batch_size
    )
    for k, v in dict_his.items():
        dict_all[k].append(v)
    save_his(dict_all, start_datetime)
        
    if dict_his['val_obj_loss'] < best_obj_loss:
        best_obj_loss = dict_his['val_obj_loss']
        torch.save(model.state_dict(), Path(PATH_MODEL_DATA) / f"BestObjModel.pth")
        
    if dict_his['val_seg_loss'] < best_seg_loss:
        best_seg_loss = dict_his['val_seg_loss']
        torch.save(model.state_dict(), Path(PATH_MODEL_DATA) / f"BestSegModel.pth")

Epoch 1 / 70: Train: 100%|██████████| 217/217 [02:49<00:00,  1.28it/s, train_obj_loss=7.16, train_seg_loss=3.14]
Epoch 1 / 70: Val: 100%|██████████| 31/31 [00:17<00:00,  1.81it/s, val_obj_loss=3.77, val_seg_loss=3.04]
Epoch 2 / 70: Train: 100%|██████████| 217/217 [02:49<00:00,  1.28it/s, train_obj_loss=1.49, train_seg_loss=2.1] 
Epoch 2 / 70: Val: 100%|██████████| 31/31 [00:18<00:00,  1.68it/s, val_obj_loss=1.91, val_seg_loss=3.15]
Epoch 3 / 70: Train: 100%|██████████| 217/217 [02:46<00:00,  1.31it/s, train_obj_loss=0.715, train_seg_loss=1.66]
Epoch 3 / 70: Val: 100%|██████████| 31/31 [00:17<00:00,  1.82it/s, val_obj_loss=1.29, val_seg_loss=3]   
Epoch 4 / 70: Train: 100%|██████████| 217/217 [02:49<00:00,  1.28it/s, train_obj_loss=0.45, train_seg_loss=1.36] 
Epoch 4 / 70: Val: 100%|██████████| 31/31 [00:18<00:00,  1.71it/s, val_obj_loss=1.06, val_seg_loss=3.05]
Epoch 5 / 70: Train: 100%|██████████| 217/217 [02:47<00:00,  1.29it/s, train_obj_loss=0.324, train_seg_loss=1.18]
Epoch 5 / 70

In [None]:
dict_all

{'epoch': [30],
 'train_obj_loss': [0.0544],
 'train_seg_loss': [0.6453],
 'val_obj_loss': [0.4132],
 'val_seg_loss': [3.1825]}

### Test Segmentation