In [1]:
import os
import time
import copy
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from nets.yolo4 import YoloBody
from nets.yolo_training import YOLOLoss, Generator

In [2]:
#---------------------------------------------------#
#   获得类和先验框
#---------------------------------------------------#
def get_classes(classes_path):
    '''loads the classes'''
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names


def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape([-1,3,2])[::-1,:,:]


#---------------------------------------------------#
#   训练一个epoch
#---------------------------------------------------#
def fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen,genval, Epoch, cuda, optimizer, lr_scheduler):
    total_loss = 0
    val_loss = 0
    print('\n' + '-' * 10 + 'Train one epoch.' + '-' * 10)
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Start Training.')
    net.train()
    for iteration in range(epoch_size):
        start_time = time.time()
        images, targets = next(gen)
        with torch.no_grad():
            if cuda:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda()
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
            else:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
        optimizer.zero_grad()
#         with torch.no_grad():
#             outputs = net(images)
        outputs = net(images)
        losses = []
        for i in range(3):
            loss_item = yolo_losses[i](outputs[i], targets)
            losses.append(loss_item[0])
        loss = sum(losses)
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        total_loss += loss
        waste_time = time.time() - start_time
        if iteration == 0 or (iteration+1) % 10 == 0:
            print('step:' + str(iteration+1) + '/' + str(epoch_size) + ' || Total Loss: %.4f || %.4fs/step' % (total_loss/(iteration+1), waste_time))
    print('Finish Training.')
    '''        
    print('Start Validation.')
    net.eval()
    for iteration in range(epoch_size_val):
        images_val, targets_val = next(genval)

        with torch.no_grad():
            if cuda:
                images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor)).cuda()
                targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
            else:
                images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor))
                targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
            optimizer.zero_grad()
            outputs = net(images_val)
            losses = []
            for i in range(3):
                loss_item = yolo_losses[i](outputs[i], targets_val)
                losses.append(loss_item[0])
            loss = sum(losses)
            val_loss += loss
    print('Finish Validation')
    '''
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1), val_loss/(epoch_size_val+1)))
    
    return total_loss/(epoch_size+1), val_loss/(epoch_size_val+1)

In [3]:
#-------------------------------#
#   输入的shape大小
#   显存比较小可以使用416x416
#   显存比较大可以使用608x608
#-------------------------------#
# input_shape = (416,416)
input_shape = (608, 608)

#-------------------------------#
#   tricks的使用设置
#-------------------------------#
Cosine_lr = True
mosaic = True
# 用于设定是否使用cuda
Cuda = True
smoooth_label = 0.03

#-------------------------------#
#   获得训练集和验证集的annotations
#   
#-------------------------------#
train_annotation_path = 'model_data/mask_train.txt'
val_annotation_path = 'model_data/mask_val.txt'

#-------------------------------#
#   获得先验框和类
#-------------------------------#
anchors_path = 'model_data/yolo_anchors.txt'
classes_path = 'model_data/mask_classes.txt'   
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)
num_classes = len(class_names)

In [4]:
# 创建模型
model = YoloBody(len(anchors[0]), num_classes)
#model_path = "model_data/yolov4_coco_pretrained_weights.pth"
#model_path = "model_data/yolov4_maskdetect_weights0.pth"
model_path = "model_data/yolov4_maskdetect_weights1.pth"
# 加快模型训练的效率
print('Loading pretrained model weights.')
model_dict = model.state_dict()
pretrained_dict = torch.load(model_path)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
print('Finished!')

if Cuda:
    net = torch.nn.DataParallel(model)
    cudnn.benchmark = True
    net = net.cuda()
else:
    net = torch.nn.DataParallel(model)

# 建立loss函数
yolo_losses = []
for i in range(3):
    yolo_losses.append(YOLOLoss(np.reshape(anchors, [-1,2]), num_classes, \
                                (input_shape[1], input_shape[0]), smoooth_label, Cuda))
# read train lines and val lines
with open(train_annotation_path) as f:
    train_lines = f.readlines()
with open(val_annotation_path) as f:
    val_lines = f.readlines()
num_train = len(train_lines)
num_val = len(val_lines)

Loading pretrained model weights.
Finished!


In [5]:
#------------------------------------#
#   先冻结backbone训练
#------------------------------------#
lr = 1e-3
Batch_size = 4
Init_Epoch = 0
Freeze_Epoch = 25
        
optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)
                        
epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
for param in model.backbone.parameters():
    param.requires_grad = False

best_loss = 99999999.0
best_model_weights = copy.deepcopy(net.state_dict())
for epoch in range(Init_Epoch, Freeze_Epoch):
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Freeze_Epoch, Cuda, optimizer, lr_scheduler)
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
    with open('total_loss.csv', mode='a+') as total_loss_file:
        total_loss_file.write(str(total_loss.item()) + '\n')
    #with open('val_loss.csv', mode='a+') as val_loss_file:
    #    val_loss_file.write(str(val_loss.item()) + '\n')
torch.save(best_model_weights, 'model_data/yolov4_maskdetect_weights1.pth')


----------Train one epoch.----------
Epoch:1/25
Start Training.


  tmp_targets = np.array(targets)


step:1/321 || Total Loss: 13812.1582 || 10.9782s/step
step:10/321 || Total Loss: 8798.8232 || 2.1696s/step
step:20/321 || Total Loss: 6131.0508 || 2.2817s/step
step:30/321 || Total Loss: 4634.3394 || 3.1193s/step
step:40/321 || Total Loss: 3715.6829 || 3.0423s/step
step:50/321 || Total Loss: 3103.4028 || 1.9164s/step
step:60/321 || Total Loss: 2670.1426 || 2.9272s/step
step:70/321 || Total Loss: 2347.0720 || 2.8882s/step
step:80/321 || Total Loss: 2096.9109 || 3.3705s/step
step:90/321 || Total Loss: 1896.5676 || 3.7778s/step
step:100/321 || Total Loss: 1733.1326 || 3.5376s/step
step:110/321 || Total Loss: 1596.3851 || 2.7440s/step
step:120/321 || Total Loss: 1480.7101 || 2.8561s/step
step:130/321 || Total Loss: 1381.4700 || 3.8309s/step
step:140/321 || Total Loss: 1295.2689 || 3.1614s/step
step:150/321 || Total Loss: 1219.4409 || 3.7638s/step
step:160/321 || Total Loss: 1152.4675 || 3.0433s/step
step:170/321 || Total Loss: 1092.5598 || 2.4738s/step
step:180/321 || Total Loss: 1038.9232

step:170/321 || Total Loss: 17.0081 || 3.6827s/step
step:180/321 || Total Loss: 16.9212 || 3.4005s/step
step:190/321 || Total Loss: 16.7686 || 2.8821s/step
step:200/321 || Total Loss: 16.7258 || 3.0493s/step
step:210/321 || Total Loss: 16.7310 || 2.8361s/step
step:220/321 || Total Loss: 16.6782 || 3.8909s/step
step:230/321 || Total Loss: 16.5930 || 3.0903s/step
step:240/321 || Total Loss: 16.6162 || 3.0262s/step
step:250/321 || Total Loss: 16.6641 || 2.8221s/step
step:260/321 || Total Loss: 16.5904 || 2.8691s/step
step:270/321 || Total Loss: 16.6944 || 2.8541s/step
step:280/321 || Total Loss: 16.7056 || 3.0473s/step
step:290/321 || Total Loss: 16.6438 || 3.1273s/step
step:300/321 || Total Loss: 16.7014 || 3.6257s/step
step:310/321 || Total Loss: 16.7036 || 3.4306s/step
step:320/321 || Total Loss: 16.7213 || 1.6392s/step
Finish Training.
Total Loss: 16.6459 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:6/25
Start Training.
step:1/321 || Total Loss: 19.9619 || 2.8391s/

step:1/321 || Total Loss: 6.9868 || 2.0916s/step
step:10/321 || Total Loss: 12.6926 || 3.1554s/step
step:20/321 || Total Loss: 12.6039 || 2.5279s/step
step:30/321 || Total Loss: 12.5139 || 3.1173s/step
step:40/321 || Total Loss: 13.1212 || 2.5799s/step
step:50/321 || Total Loss: 13.4530 || 1.8314s/step
step:60/321 || Total Loss: 13.3646 || 2.7631s/step
step:70/321 || Total Loss: 12.9111 || 1.7613s/step
step:80/321 || Total Loss: 12.8448 || 2.1076s/step
step:90/321 || Total Loss: 12.4900 || 3.3495s/step
step:100/321 || Total Loss: 12.2096 || 2.5129s/step
step:110/321 || Total Loss: 12.5372 || 2.8081s/step
step:120/321 || Total Loss: 12.2448 || 3.0072s/step
step:130/321 || Total Loss: 12.0679 || 2.4899s/step
step:140/321 || Total Loss: 12.5698 || 2.3548s/step
step:150/321 || Total Loss: 12.6427 || 3.3805s/step
step:160/321 || Total Loss: 12.7797 || 2.8882s/step
step:170/321 || Total Loss: 12.9119 || 3.3125s/step
step:180/321 || Total Loss: 12.8076 || 3.0483s/step
step:190/321 || Total Lo

step:180/321 || Total Loss: 11.6684 || 2.6570s/step
step:190/321 || Total Loss: 11.6140 || 2.5689s/step
step:200/321 || Total Loss: 11.5725 || 3.1073s/step
step:210/321 || Total Loss: 11.4829 || 2.0535s/step
step:220/321 || Total Loss: 11.4130 || 1.7763s/step
step:230/321 || Total Loss: 11.4115 || 3.2164s/step
step:240/321 || Total Loss: 11.4936 || 4.0210s/step
step:250/321 || Total Loss: 11.4927 || 1.8684s/step
step:260/321 || Total Loss: 11.4938 || 2.8651s/step
step:270/321 || Total Loss: 11.4866 || 2.9412s/step
step:280/321 || Total Loss: 11.5956 || 3.1744s/step
step:290/321 || Total Loss: 11.6914 || 3.7338s/step
step:300/321 || Total Loss: 11.6639 || 2.8581s/step
step:310/321 || Total Loss: 11.5746 || 2.2557s/step
step:320/321 || Total Loss: 11.5030 || 2.6510s/step
Finish Training.
Total Loss: 11.4501 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:15/25
Start Training.
step:1/321 || Total Loss: 15.6057 || 2.9942s/step
step:10/321 || Total Loss: 10.6920 || 2.6630s/

step:1/321 || Total Loss: 6.1416 || 3.3495s/step
step:10/321 || Total Loss: 10.7718 || 2.2777s/step
step:20/321 || Total Loss: 12.8412 || 2.7901s/step
step:30/321 || Total Loss: 12.1919 || 2.5479s/step
step:40/321 || Total Loss: 12.1332 || 2.4708s/step
step:50/321 || Total Loss: 11.2104 || 2.5009s/step
step:60/321 || Total Loss: 11.0187 || 2.8611s/step
step:70/321 || Total Loss: 10.7646 || 3.2044s/step
step:80/321 || Total Loss: 10.9724 || 1.7703s/step
step:90/321 || Total Loss: 11.2066 || 3.0773s/step
step:100/321 || Total Loss: 10.9300 || 3.3665s/step
step:110/321 || Total Loss: 11.0364 || 2.7601s/step
step:120/321 || Total Loss: 10.7742 || 2.5659s/step
step:130/321 || Total Loss: 10.5598 || 3.3765s/step
step:140/321 || Total Loss: 10.8116 || 3.0152s/step
step:150/321 || Total Loss: 10.8561 || 2.5709s/step
step:160/321 || Total Loss: 10.8326 || 2.5559s/step
step:170/321 || Total Loss: 10.7897 || 3.1213s/step
step:180/321 || Total Loss: 10.7765 || 2.9152s/step
step:190/321 || Total Lo

step:180/321 || Total Loss: 10.0650 || 2.2967s/step
step:190/321 || Total Loss: 10.0411 || 1.9915s/step
step:200/321 || Total Loss: 10.0814 || 2.8051s/step
step:210/321 || Total Loss: 10.0092 || 3.0653s/step
step:220/321 || Total Loss: 10.0525 || 3.1054s/step
step:230/321 || Total Loss: 10.1275 || 1.8073s/step
step:240/321 || Total Loss: 10.3863 || 3.6637s/step
step:250/321 || Total Loss: 10.3808 || 3.3635s/step
step:260/321 || Total Loss: 10.3272 || 2.6049s/step
step:270/321 || Total Loss: 10.3594 || 3.7628s/step
step:280/321 || Total Loss: 10.3221 || 2.7991s/step
step:290/321 || Total Loss: 10.3647 || 3.7959s/step
step:300/321 || Total Loss: 10.3333 || 2.7721s/step
step:310/321 || Total Loss: 10.2669 || 1.8384s/step
step:320/321 || Total Loss: 10.1698 || 3.7348s/step
Finish Training.
Total Loss: 10.1615 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:24/25
Start Training.
step:1/321 || Total Loss: 29.0811 || 4.0800s/step
step:10/321 || Total Loss: 11.8689 || 2.6750s/

In [6]:
#------------------------------------#
#   解冻backbone后训练
#------------------------------------#
lr = 1e-4
Batch_size = 4
Freeze_Epoch = 25
Unfreeze_Epoch = 50

optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)
                        
epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
for param in model.backbone.parameters():
    param.requires_grad = False

for epoch in range(Freeze_Epoch, Unfreeze_Epoch):
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Unfreeze_Epoch, Cuda, optimizer, lr_scheduler)
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
    with open('total_loss.csv', mode='a+') as total_loss_file:
        total_loss_file.write(str(total_loss.item()) + '\n')
    #with open('val_loss.csv', mode='a+') as val_loss_file:
    #    val_loss_file.write(str(val_loss.item() + '\n')
torch.save(best_model_weights, 'model_data/yolov4_maskdetect_weights1.pth')


----------Train one epoch.----------
Epoch:26/50
Start Training.
step:1/321 || Total Loss: 4.5009 || 3.0973s/step
step:10/321 || Total Loss: 8.8686 || 2.1456s/step
step:20/321 || Total Loss: 7.8487 || 2.9942s/step
step:30/321 || Total Loss: 8.6234 || 3.3725s/step
step:40/321 || Total Loss: 9.2192 || 2.4058s/step
step:50/321 || Total Loss: 9.0070 || 2.8531s/step
step:60/321 || Total Loss: 8.9702 || 3.1994s/step
step:70/321 || Total Loss: 8.8885 || 3.1143s/step
step:80/321 || Total Loss: 8.9379 || 1.8133s/step
step:90/321 || Total Loss: 8.8064 || 2.0115s/step
step:100/321 || Total Loss: 9.0023 || 2.4338s/step
step:110/321 || Total Loss: 9.0037 || 2.5659s/step
step:120/321 || Total Loss: 8.9751 || 3.7698s/step
step:130/321 || Total Loss: 9.0149 || 3.6988s/step
step:140/321 || Total Loss: 8.9246 || 3.2784s/step
step:150/321 || Total Loss: 9.0433 || 2.1286s/step
step:160/321 || Total Loss: 8.9522 || 2.6520s/step
step:170/321 || Total Loss: 8.9516 || 3.4266s/step
step:180/321 || Total Loss:

step:190/321 || Total Loss: 7.4267 || 2.6129s/step
step:200/321 || Total Loss: 7.3420 || 3.3465s/step
step:210/321 || Total Loss: 7.2832 || 3.9349s/step
step:220/321 || Total Loss: 7.2857 || 1.8924s/step
step:230/321 || Total Loss: 7.3081 || 3.2434s/step
step:240/321 || Total Loss: 7.3087 || 2.9832s/step
step:250/321 || Total Loss: 7.3722 || 2.5309s/step
step:260/321 || Total Loss: 7.2809 || 2.7971s/step
step:270/321 || Total Loss: 7.2006 || 3.7248s/step
step:280/321 || Total Loss: 7.1491 || 3.1103s/step
step:290/321 || Total Loss: 7.1558 || 3.1413s/step
step:300/321 || Total Loss: 7.1591 || 2.5859s/step
step:310/321 || Total Loss: 7.2538 || 2.6590s/step
step:320/321 || Total Loss: 7.2301 || 2.2677s/step
Finish Training.
Total Loss: 7.2004 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:31/50
Start Training.
step:1/321 || Total Loss: 4.9189 || 2.4138s/step
step:10/321 || Total Loss: 6.0950 || 3.1714s/step
step:20/321 || Total Loss: 7.3387 || 3.4646s/step
step:30/321 ||

step:40/321 || Total Loss: 7.6673 || 2.6029s/step
step:50/321 || Total Loss: 7.6937 || 1.7473s/step
step:60/321 || Total Loss: 7.5916 || 3.7438s/step
step:70/321 || Total Loss: 7.2869 || 3.9359s/step
step:80/321 || Total Loss: 7.3217 || 3.0293s/step
step:90/321 || Total Loss: 7.2205 || 3.2364s/step
step:100/321 || Total Loss: 7.1230 || 3.4595s/step
step:110/321 || Total Loss: 7.1247 || 3.1544s/step
step:120/321 || Total Loss: 6.9635 || 2.3427s/step
step:130/321 || Total Loss: 6.8479 || 4.0961s/step
step:140/321 || Total Loss: 6.8882 || 2.2337s/step
step:150/321 || Total Loss: 6.8574 || 3.9269s/step
step:160/321 || Total Loss: 7.0396 || 1.9654s/step
step:170/321 || Total Loss: 7.0179 || 3.9840s/step
step:180/321 || Total Loss: 6.9958 || 3.1233s/step
step:190/321 || Total Loss: 6.9924 || 2.2737s/step
step:200/321 || Total Loss: 6.8955 || 2.9102s/step
step:210/321 || Total Loss: 6.9054 || 2.7461s/step
step:220/321 || Total Loss: 6.8798 || 2.8621s/step
step:230/321 || Total Loss: 6.8800 ||

step:240/321 || Total Loss: 6.2168 || 3.1654s/step
step:250/321 || Total Loss: 6.1849 || 3.1623s/step
step:260/321 || Total Loss: 6.2098 || 2.3427s/step
step:270/321 || Total Loss: 6.2159 || 1.8033s/step
step:280/321 || Total Loss: 6.2362 || 3.8119s/step
step:290/321 || Total Loss: 6.2860 || 2.3407s/step
step:300/321 || Total Loss: 6.3632 || 4.2842s/step
step:310/321 || Total Loss: 6.3597 || 2.9252s/step
step:320/321 || Total Loss: 6.3937 || 3.9469s/step
Finish Training.
Total Loss: 6.3595 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:40/50
Start Training.
step:1/321 || Total Loss: 5.1807 || 2.1636s/step
step:10/321 || Total Loss: 7.8947 || 3.1353s/step
step:20/321 || Total Loss: 8.2547 || 2.2207s/step
step:30/321 || Total Loss: 8.1060 || 2.3848s/step
step:40/321 || Total Loss: 7.7455 || 2.6870s/step
step:50/321 || Total Loss: 7.5476 || 2.7881s/step
step:60/321 || Total Loss: 7.3930 || 1.7903s/step
step:70/321 || Total Loss: 7.4475 || 3.0193s/step
step:80/321 || Tota

step:90/321 || Total Loss: 6.4870 || 3.4566s/step
step:100/321 || Total Loss: 6.3631 || 2.4628s/step
step:110/321 || Total Loss: 6.4074 || 2.4859s/step
step:120/321 || Total Loss: 6.3339 || 2.5989s/step
step:130/321 || Total Loss: 6.1607 || 2.2397s/step
step:140/321 || Total Loss: 6.2586 || 3.3495s/step
step:150/321 || Total Loss: 6.4093 || 3.1473s/step
step:160/321 || Total Loss: 6.4046 || 3.2524s/step
step:170/321 || Total Loss: 6.4505 || 3.0503s/step
step:180/321 || Total Loss: 6.4608 || 2.1956s/step
step:190/321 || Total Loss: 6.5327 || 3.8069s/step
step:200/321 || Total Loss: 6.4939 || 2.1155s/step
step:210/321 || Total Loss: 6.5578 || 2.6440s/step
step:220/321 || Total Loss: 6.5412 || 2.5309s/step
step:230/321 || Total Loss: 6.5914 || 2.3498s/step
step:240/321 || Total Loss: 6.5268 || 4.3432s/step
step:250/321 || Total Loss: 6.4915 || 2.0385s/step
step:260/321 || Total Loss: 6.5197 || 2.0015s/step
step:270/321 || Total Loss: 6.6013 || 2.7350s/step
step:280/321 || Total Loss: 6.54

step:290/321 || Total Loss: 6.5614 || 1.6772s/step
step:300/321 || Total Loss: 6.4993 || 3.3946s/step
step:310/321 || Total Loss: 6.5938 || 3.0022s/step
step:320/321 || Total Loss: 6.5770 || 3.8669s/step
Finish Training.
Total Loss: 6.5652 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:49/50
Start Training.
step:1/321 || Total Loss: 5.0984 || 2.4478s/step
step:10/321 || Total Loss: 6.3576 || 1.7943s/step
step:20/321 || Total Loss: 5.6605 || 3.4115s/step
step:30/321 || Total Loss: 5.3596 || 2.3748s/step
step:40/321 || Total Loss: 5.5424 || 3.2915s/step
step:50/321 || Total Loss: 5.8131 || 2.7541s/step
step:60/321 || Total Loss: 6.1768 || 2.0485s/step
step:70/321 || Total Loss: 5.9118 || 3.3095s/step
step:80/321 || Total Loss: 5.7943 || 3.6597s/step
step:90/321 || Total Loss: 5.8772 || 2.5299s/step
step:100/321 || Total Loss: 5.7452 || 3.3345s/step
step:110/321 || Total Loss: 5.7255 || 3.3505s/step
step:120/321 || Total Loss: 5.6569 || 3.0613s/step
step:130/321 || Total

In [5]:
#------------------------------------#
#   解冻backbone后训练
#------------------------------------#
lr = 1e-4
Batch_size = 2
Freeze_Epoch = 25
Unfreeze_Epoch = 50

optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)
                        
epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
for param in model.backbone.parameters():
    param.requires_grad = False
    
best_loss = 55.1667
best_model_weights = copy.deepcopy(net.state_dict())

for epoch in range(Freeze_Epoch, Unfreeze_Epoch):
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Unfreeze_Epoch, Cuda, optimizer, lr_scheduler)
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
    with open('total_loss.csv', mode='a+') as total_loss_file:
        total_loss_file.write(str(total_loss.item()) + '\n')
    #with open('val_loss.csv', mode='a+') as val_loss_file:
    #    val_loss_file.write(str(val_loss.item() + '\n')
torch.save(best_model_weights, 'model_data/yolov4_coco_pretrained_weights.pth1')


----------Train one epoch.----------
Epoch:26/50
Start Training.


  tmp_targets = np.array(targets)


step:1/40 || Total Loss: 98.5296 || 2.5925s/step


RuntimeError: CUDA out of memory. Tried to allocate 44.00 MiB (GPU 0; 4.00 GiB total capacity; 1.21 GiB already allocated; 32.80 MiB free; 1.21 GiB reserved in total by PyTorch)