In [1]:
# 导入模块
print("模块导入")
import os
import time
import copy
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from nets.yolo4 import YoloBody,froze_params
from nets.yolo_training import YOLOLoss, Generator
print("模块导入完成！")

模块导入
模块导入完成！


In [2]:
#---------------------------------------------------#
#   获得类和先验框
#---------------------------------------------------#
print("定义函数")
def get_classes(classes_path):
    # 读取类别文件
    with open(classes_path) as f:
        class_names = f.readlines()
    # 获取分类并返回
    class_names = [c.strip() for c in class_names]
    return class_names


def get_anchors(anchors_path):
    # 读取先验框文件
    with open(anchors_path) as f:
        anchors = f.readline()
    # 获取先验框并返回
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape([-1,3,2])[::-1,:,:]


#---------------------------------------------------#
#   训练一个epoch
#---------------------------------------------------#
def fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen,genval, Epoch, cuda, optimizer, lr_scheduler):
    # 初始训练loss和验证loss
    total_loss = 0
    val_loss = 0
    print('\n' + '-' * 10 + 'Train one epoch.' + '-' * 10)
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    net.train()
    for iteration in range(epoch_size):
        print("\n")
        start_time = time.time()
        # 读取数据
        images, targets = next(gen)
        # 将数据格式进行转化
        with torch.no_grad():
            if cuda:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda()
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
            else:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
        # 梯度清零
        optimizer.zero_grad()
        # 前向传播
        outputs = net(images)
        # 记录loss信息
        losses = []
        for i in range(3):
            loss_item = yolo_losses[i](outputs[i], targets)
            losses.append(loss_item[0])
        # loss求和
        loss = sum(losses)
        # loss反向传播
        loss.backward()
        # 模型更新
        optimizer.step()
        # 学习率调度器更新
        lr_scheduler.step()
        # 打印loss信息
        total_loss += loss
        waste_time = time.time() - start_time
        print('step:' + str(iteration+1) + '/' + str(epoch_size) + ' || Total Loss: %.4f || %.4fs/step' % (total_loss/(iteration+1), waste_time))
    return total_loss/(epoch_size+1), val_loss/(epoch_size_val+1)
print("函数定义完成！")

定义函数
函数定义完成！


In [3]:
#-------------------------------#
#   输入的shape大小
#   显存比较小可以使用416x416
#   显存比较大可以使用608x608
#-------------------------------#
print("参数定义")
input_shape = (416,416)
# input_shape = (608, 608)

#-------------------------------#
#   tricks的使用设置
#-------------------------------#
Cosine_lr = True
mosaic = True
# 用于设定是否使用cuda
Cuda = False
smoooth_label = 0.03

#-------------------------------#
#   获得训练集和验证集的annotations
#   
#-------------------------------#
train_annotation_path = 'model_data/mask_train_new.txt'
val_annotation_path = 'model_data/mask_val_new.txt'

#-------------------------------#
#   获得先验框和类
#-------------------------------#
anchors_path = 'model_data/yolo_anchors.txt'
classes_path = 'model_data/mask_classes_new.txt'   
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)
num_classes = len(class_names)
print("参数定义完成！")

参数定义
参数定义完成！


In [4]:
print("创建模型和数据迭代器")
# 创建模型
model = YoloBody(len(anchors[0]), num_classes)
# 根据是否使用cuda部署模型
if Cuda:
    cudnn.benchmark = True
    net = net.cuda()
else:
    net = model
# 打印模型结构
print(net)
# 建立loss函数
yolo_losses = []
for i in range(3):
    yolo_losses.append(YOLOLoss(np.reshape(anchors, [-1,2]), num_classes, \
                                (input_shape[1], input_shape[0]), smoooth_label, Cuda))
# 读取训练和验证数据文件
with open(train_annotation_path) as f:
    train_lines = f.readlines()
with open(val_annotation_path) as f:
    val_lines = f.readlines()
# 计算训练集和验证集大小
num_train = len(train_lines)
num_val = len(val_lines)
print(f"训练集大小：{num_train},验证集大小：{num_val}")
print("创建完成！")

创建模型和数据迭代器
YoloBody(
  (backbone): CSPDarkNet(
    (conv1): BasicConv(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): Mish()
    )
    (stages): ModuleList(
      (0): Resblock_body(
        (downsample_conv): BasicConv(
          (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation): Mish()
        )
        (split_conv0): BasicConv(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation): Mish()
        )
        (split_conv1): BasicConv(
          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn): BatchNorm

In [5]:
#------------------------------------#
#   先冻结backbone训练
#------------------------------------#
print("开始训练")
# 定义学习率
lr = 1e-3
# 定义批次大小
Batch_size = 1
# 定义开始epoch
Init_Epoch = 0
# 定义结束的epoch
Freeze_Epoch = 1
# 自定义一个epoch中的迭代数
iteration_step = 2
# 定义优化器
optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)

# 是否使用余弦调度器
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
# 定义数据生成器
gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)
# 定义一个训练epoch的迭代次数                    
epoch_size = min(int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size),iteration_step)
# 定义一个测试epoch的迭代次数 
epoch_size_val = min(num_val//Batch_size,iteration_step)
# 冻结骨干网络
for n, p in net.named_parameters():
    if n.split(".")[0] in froze_params:
        p.requires_grad = False
# 获取并打印模型参数数量
n_parameters = sum(p.numel() for p in net.parameters() if p.requires_grad)
print('训练参数量:', n_parameters)
# 定义最好loss
best_loss = 99999999.0
# 定义最好的权重模型参数
best_model_weights = copy.deepcopy(net.state_dict())
# 开始训练
for epoch in range(Init_Epoch, Freeze_Epoch):
    # 训练一个epoch
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Freeze_Epoch, Cuda, optimizer, lr_scheduler)
    # 判断当前loss是否超过最好loss，是则保存模型
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
# 保存模型
torch.save(best_model_weights, 'model_data/yolov4_maskdetect_weights0.pth')
print("训练完成！")

开始训练
number of params: 303134

----------Train one epoch.----------
Epoch:1/1
Start Training.
0


  allow_unreachable=True)  # allow_unreachable flag


step:1/2 || Total Loss: 7889.3081 || 88.8030s/step
1


KeyboardInterrupt: 

In [6]:
#------------------------------------#
#   解冻backbone后训练
#------------------------------------#
print("继续训练")
# 读取上次训练的模型权重或者预训练模型
resume = True
if resume:
    model_path = "model_data/yolov4_maskdetect_weights0.pth"
else:
    model_path = "model_data/yolo_indoor.pth"
# 读取模型权重加快模型训练的效率
print('Loading pretrained model weights.')
model_dict = model.state_dict()
pretrained_dict = torch.load(model_path)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
print('Finished!')
net = model
# 定义学习率
lr = 1e-4
# 定义批次大小
Batch_size = 1
# 定义解冻后开始的epoch
Freeze_Epoch = 0
# 定义结束训练epoch
Unfreeze_Epoch = 1
# 定义优化器
optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
# 是否使用余弦调度器
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)
# 定义训练数据生成器
gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
# 定义测试数据生成器
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)
# 定义一个训练epoch的迭代次数                       
epoch_size = min(int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size),iteration_step)
# 定义一个测试epoch的迭代次数
epoch_size_val = min(num_val//Batch_size,iteration_step)
# 开始训练
for epoch in range(Freeze_Epoch, Unfreeze_Epoch):
    # 训练一个epoch
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Unfreeze_Epoch, Cuda, optimizer, lr_scheduler)
    # 判断当前loss是否超过最好loss，是则保存模型
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
# 保存模型到本地
torch.save(best_model_weights, 'model_data/yolov4_maskdetect_weights1.pth')
print("训练完成")

继续训练
Loading pretrained model weights.
Finished!

----------Train one epoch.----------
Epoch:1/1
Start Training.
0
step:1/2 || Total Loss: 6965.2368 || 117.4981s/step
1
step:2/2 || Total Loss: 6953.8408 || 122.8974s/step
Finish Training.
训练完成
