In [None]:
# 用于colab加载google drive文件
# from google.colab import drive
# drive.mount('/content/drive')
# import os
# os.chdir("/content/drive/MyDrive/arrow_run/yolov3_arrow_run")

# 1 数据准备

## 1.1 画出包围盒，同时生成txt文件
方便标注箭头，因为我们这里采用的是先标注包围盒，再标注箭头
所以要根据标注文件先生成有包围盒的图片，在进行标注，同时生成包围盒对应的txt文件，可以作为原训练的数据输入

In [None]:
# 导入库文件
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import cv2
import numpy as np

In [None]:
# 生成需要的路径
if not os.path.exists('data/ImgWithBB/'): # 带包围盒的图片
    os.makedirs('data/ImgWithBB/') 
if not os.path.exists('data/ImgWithBBA/'): # 带箭头包围盒的图片
    os.makedirs('data/ImgWithBBA/') 
if not os.path.exists('data/labels_b/'): # 只有包围盒的标签文件
    os.makedirs('data/labels_b/') 
if not os.path.exists('data/labels_a/'): # 带有箭头包围盒的标签文件
    os.makedirs('data/labels_a/') 
if not os.path.exists('data/ImageSets'): # 放置划分之后的数据集
    os.makedirs('data/ImageSets') 

In [None]:
wd = getcwd() # 获取当前路径
data_path = wd+"/data/"
classes = []#对应的类别

# 获取类别，打开之后记得关闭
class_list_file = open(data_path+'class_list.txt') 
for i in class_list_file.readlines():
  # print(i[2:-1])
  classes.append(i[2:-1]) 
class_list_file.close() # 记得关闭文件

In [None]:
# 函数定义
def convert_box(size, box):
    # 根据xml文件，输出归一化的包围盒数据
    dw = 1. / size[0]
    dh = 1. / size[1]
    # 获取包围盒中心的坐标
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    # 获取包围盒的宽和高
    w = box[1] - box[0]
    h = box[3] - box[2]
    # 转换成0，1之间的数字
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh

    for i in [x,y,w,h]:
        if i < 0 or i > 1:
            return False
    # 转换成包围盒中心坐标盒长宽，已经归一化处理
    return (x, y, w, h)

def draw_bb(image_id):
    # 根据id读取图片盒对应的xml文件，生成包围盒图片和txt文件，返回错误的数据
    img_path = data_path+'JPEGImages/%s.jpg'%(image_id)
    out_path = data_path+'ImgWithBB/%s_BB.jpg' % (image_id)
    out_txt_path = data_path+'labels_b/%s.txt' % (image_id)

    img = cv2.imread(img_path)
    # 打开txt文件
    out_txt = open(out_txt_path,'w')
    # 读取xml文件
    xml_file = open('data/Annotations/%s.xml' % (image_id))
    tree = ET.parse(xml_file)
    root = tree.getroot()
    # 获取长宽
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    # 有不要的标签，错误的标记
    noneedlbox = "no"
    errorlabel = "no"
    
    for i,obj in enumerate(root.iter('object')):
        difficult = obj.find('difficult').text
        cls_ = obj.find('name').text

        if cls_ not in classes or int(difficult) == 1:
            noneedlbox = image_id
            continue
        
        cls_id = classes.index(cls_)
        # 获取文字区域框大小
        t_size = cv2.getTextSize('%d_%s'%(i,cls_), 1, cv2.FONT_HERSHEY_PLAIN, 1)[0]

        xmlbox = obj.find('bndbox')
        # 获取像素坐标
        b = [int(xmlbox.find('xmin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymin').text),
                int(xmlbox.find('ymax').text)]
        # 写入txt
        bb = convert_box((w, h), b)

        if bb == False:
            errorlabel = image_id
            continue
        out_txt.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        # 获取 文字区域右下角坐标
        ptLeftTop = np.array([b[0],b[2]])
        textlbottom = ptLeftTop - np.array(list(t_size))
        # print(ptLeftTop,textlbottom,t_size)
        # 绘制文字区域矩形框
        # cv2.rectangle(img, tuple(ptLeftTop), tuple(textlbottom),  (0, 100, 0), -1)
        # 计算文字起始位置偏移
        ptLeftTop[1] = ptLeftTop[1] - t_size[1]/2
        if ptLeftTop[1] < t_size[1]:
            ptLeftTop[1] = ptLeftTop[1] + 2*t_size[1] + b[3] - b[2]
        # 绘字
        cv2.putText(img, '%d_%s'%(i,cls_) , tuple(ptLeftTop), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), 1)
        # 绘图
        cv2.rectangle(img, (b[0],b[2]), (b[1],b[3]), (0,255,0), 2)
    
    out_txt.close()
    xml_file.close()
    cv2.imwrite(out_path, img)

    return noneedlbox,errorlabel

In [None]:
label_path = 'data/Annotations'
total_label = os.listdir(label_path)
n_list = []
e_list = []
for i in total_label:
    noneedlabel,errorlabel = draw_bb(i[:-4])
    if noneedlabel != 'no':
        n_list.append(noneedlabel)
    if errorlabel != "no":
        e_list.append(errorlabel)

In [None]:
print("total:%d,noneedlabels:%d,errorlabel:%d"%(len(total_label),len(n_list),len(e_list)))

In [None]:
# 记录有袋子列表
n_file = open('data/bag_list.txt','w')
for i in n_list:
    n_file.write(i+'\n')
n_file.close()

## 1.2 读取箭头txt，与xml一起生成labels_a两个文件夹的txt，同时生成带箭头包围盒的图片
bag不画，不写入txt，只是记录下来

In [None]:
# -*- coding:utf-8 -*
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import cv2
from os import listdir, getcwd
import numpy as np
wd = getcwd()
data_path = wd+"/data/"

classes = []#对应的类别
class_list_file = open(data_path+'class_list.txt') 
for i in class_list_file.readlines():
  # print(i[2:-1])
  classes.append(i[2:-1]) 

def convert_box_with_arrow(size, box, arrow):
    dw = 1. / size[0]
    dh = 1. / size[1]
    # 获取包围盒中心的坐标
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    # 获取包围盒的宽和高
    w = box[1] - box[0]
    h = box[3] - box[2]
    # 箭头坐标转换,相对于包围盒左上角的坐标，并且归一到0，1之间
    ax1 = (arrow[0][0] - box[0]) / w
    ax2 = (arrow[1][0] - box[0]) / w
    ay1 = (arrow[0][1] - box[2]) / h
    ay2 = (arrow[1][1] - box[2]) / h
    # 转换成0，1之间的数字
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh

    for i in [x,y,w,h,ax1,ax2,ay1,ay2]:
        if i < 0 or i > 1:
            return False
    # 转换成包围盒中心坐标盒长宽，已经归一化处理
    return (x, y, w, h, ax1, ay1, ax2, ay2)

def draw_arrow(image_id):
    # 读取图片
    img_path = data_path+'JPEGImages/%s.jpg'%(image_id)
    out_path = 'data/ImgWithBBA/%s_BBA.jpg' % (image_id)
    out_file_a = open('data/labels_a/%s.txt' % (image_id), 'w')

    img = cv2.imread(img_path)

    # 读取xml文件
    xml_file = open('data/Annotations/%s.xml' % (image_id))
    tree = ET.parse(xml_file)
    root = tree.getroot()
    # 获取长宽
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    # 读取箭头文件
    arrow_file = open('data/Arrow/%s_BB.txt' % (image_id))
    arrow_points = []
    # 有不要的标签，错误的标记
    noneedlbox = "no"
    errorlabel = "no"

    for i,line in enumerate(arrow_file.readlines()):
        if i != 0:
            point = line[:-1].split(' ')
            arrow_points.append([int(w*float(point[0])),int(h*float(point[1]))])
    
    a_i = 0
    for i,obj in enumerate(root.iter('object')):
        difficult = obj.find('difficult').text
        cls_ = obj.find('name').text

        if cls_ not in classes or int(difficult) == 1:
            noneedlbox = image_id
            continue
        
        cls_id = classes.index(cls_)
        # 获取文字区域框大小
        t_size = cv2.getTextSize('%d_%s'%(i,cls_), 1, cv2.FONT_HERSHEY_PLAIN, 1)[0]

        xmlbox = obj.find('bndbox')
        # 获取像素坐标
        b = [int(xmlbox.find('xmin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymin').text),
                int(xmlbox.find('ymax').text)]
        # 写入txt
        bb_a = convert_box_with_arrow((w, h), b,(arrow_points[2*a_i],arrow_points[2*a_i+1]))
        
        if bb_a == False:
            errorlabel = image_id
            continue

        bb = bb_a[:4]

        # 写进txt
        # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        out_file_a.write(str(cls_id) + " " + " ".join([str(a) for a in bb_a]) + '\n')
        # 获取 文字区域右下角坐标
        ptLeftTop = np.array([b[0],b[2]])
        textlbottom = ptLeftTop - np.array(list(t_size))
        # print(ptLeftTop,textlbottom,t_size)
        # 绘制文字区域矩形框
        # cv2.rectangle(img, tuple(ptLeftTop), tuple(textlbottom),  (0, 100, 0), -1)
        # 计算文字起始位置偏移
        ptLeftTop[1] = ptLeftTop[1] - t_size[1]/2
        if ptLeftTop[1] < t_size[1]:
            ptLeftTop[1] = ptLeftTop[1] + 2*t_size[1] + b[3] - b[2]
        # 绘字
        cv2.putText(img, '%d_%s'%(i,cls_) , tuple(ptLeftTop), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), 1)
        # 绘图
        cv2.rectangle(img, (b[0],b[2]), (b[1],b[3]), (0,255,0), 2)
        cv2.arrowedLine(img,(arrow_points[2*a_i][0],arrow_points[2*a_i][1]), (arrow_points[2*a_i+1][0],arrow_points[2*a_i+1][1]), (0,0,255),2,0,0,0.2)
        cv2.circle(img,(arrow_points[2*a_i][0],arrow_points[2*a_i][1]),1,(0,225,225),4)
        cv2.circle(img,(arrow_points[2*a_i+1][0],arrow_points[2*a_i+1][1]), 1, (0,225,225),4)
        a_i += 1

    cv2.imwrite(out_path, img)
    arrow_file.close()
    out_file_a.close()
    xml_file.close()
    return noneedlbox,errorlabel

In [None]:
if not os.path.exists('data/ImgWithBBA/'):
    os.makedirs('data/ImgWithBBA/')  
if not os.path.exists('data/labels/'):
    os.makedirs('data/labels/') 
if not os.path.exists('data/labels_a/'):
    os.makedirs('data/labels_a/') 
n,e = draw_arrow('v01_026925')
print(n,e)

In [None]:
# 将全部xml转换成txt，同时去掉bag的标签
arrowpath = 'data/Arrow'
total_arrow = os.listdir(arrowpath)
n_list = []
e_list = []
for i in total_arrow:
    noneedlabel,errorlabel = draw_arrow(i[:-7])
    if noneedlabel != 'no':
        n_list.append(noneedlabel)
    if errorlabel != "no":
        e_list.append(errorlabel)
print("total:%d,noneedlabels:%d,errorlabel:%d"%(len(total_arrow),len(n_list),len(e_list)))
# 2810 - 3 = 2807

In [None]:
# 记录错误列表
e_file = open('data/e_list.txt','w')
for i in e_list:
    e_file.write(i+'\n')
e_file.close()

## 1.3 数据划分
在数据划分之前先尝试能否成labels画出这个图,记得转成整数
根据需要设置划分比例

In [None]:
# 从label反画图
labela = open('data/labels_a/v01_002075.txt')
img = cv2.imread('data/JPEGImages/v01_002075.jpg')
imgh = img.shape[0]
imgw = img.shape[1]
print(imgw,imgh)
for (index,i) in enumerate(labela.readlines()):
    pl = i.strip().split(' ')
    rl = [float(a) for a in pl] # cls_ x y w h ax1 ay1 ax2 ay2
    print(rl)
    cls_ = classes[int(rl[0])]
    print(cls_)
    x = int(rl[1]*imgw)
    y = int(rl[2]*imgh)
    w = int(rl[3]*imgw)
    h = int(rl[4]*imgh)
    lefttopx = int(x - w/2)
    rightbottomx = int(x + w/2)
    lefttopy = int(y - h/2)
    rightbottomy = int(y + h/2)
    ax1 = int(rl[5]*w + lefttopx)
    ay1 = int(rl[6]*h + lefttopy)
    ax2 = int(rl[7]*w + lefttopx)
    ay2 = int(rl[8]*h + lefttopy)
    # 绘字
    # 获取文字区域框大小
    t_size = cv2.getTextSize('%d_%s'%(index,cls_), 1, cv2.FONT_HERSHEY_PLAIN, 1)[0]
    ptLeftTop = np.array([lefttopx,lefttopy])
    textlbottom = ptLeftTop - np.array(list(t_size))

    # 计算文字起始位置偏移
    ptLeftTop[1] = ptLeftTop[1] - t_size[1]/2
    if ptLeftTop[1] < t_size[1]:
        ptLeftTop[1] = ptLeftTop[1] + 2*t_size[1] + h
    cv2.putText(img, '%d_%s'%(index,cls_) , tuple(ptLeftTop), cv2.FONT_HERSHEY_PLAIN, 1.0, (255, 255, 255), 1)
    # 绘图
    cv2.rectangle(img, (lefttopx,lefttopy), (rightbottomx,rightbottomy), (0,255,0), 2)
    cv2.arrowedLine(img,(ax1,ay1), (ax2,ay2), (0,0,255),2,0,0,0.2)
cv2.imshow('img1', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# 数据集划分
import os
import random

testval_percent = 0.2 # 0.2的数据用于验证和测试
test_percent = 0.5 # 其中一半用在测试
filepath = 'data/labels_b'
txtsavepath = 'data/ImageSets'
total = os.listdir(filepath)

num = len(total)  #统计所有的标注文件
filelist = range(num)     
tv = int(num * testval_percent)  # 设置测试验证集的数目
tr = int(tv * test_percent)      # 设置测试集的数目
testval = random.sample(filelist, tv)
test = random.sample(testval, tr)

# txt 文件写入的只是xml 文件的文件名（数字），没有后缀，如下图。
ftestval = open('data/ImageSets/testval.txt', 'w')
ftest = open('data/ImageSets/test.txt', 'w')
ftrain = open('data/ImageSets/train.txt', 'w')
fval = open('data/ImageSets/val.txt', 'w')

for i in filelist:
    name = total[i][:-4] + '\n'
    if i in testval:
        ftestval.write(name)
        if i in test:
            ftest.write(name)
        else:
            fval.write(name)
    else:
        ftrain.write(name)

ftestval.close()
ftrain.close()
fval.close()
ftest.close()

In [None]:
# 在data下面把图片路径写入对应的txt
sets = ['train', 'test', 'val']
for image_set in sets:
    image_ids = open('data/ImageSets/%s.txt' % (image_set)).read().strip().split()
    list_file = open('data/%s.txt' % (image_set), 'w')
    for image_id in image_ids:
        list_file.write('data/JPEGImages/%s.jpg\n' % (image_id))
    list_file.close()

# 2 选出锚盒
因为我们需要根据自己的数据选择预设锚盒，运行下面代码后，将输出写入cfg文件

In [None]:
from utils.utils import *; 
k = kmean_anchors('data/train.txt',n=9,gen=10000)
print(k)

In [None]:
88,59, 103,84, 136,71, 135,102, 177,89, 153,132, 189,111, 215,141, 276,190

# 3 尝试完整的将yolov3.tiny跑起来
修改cfg文件

修改yolo.data yolo.names

将dataset.py 295行的labels改成labels_a，336行label初始化成9个，364行，每一行的个数改成9个


In [None]:
from utils.utils import *; 
k = kmean_anchors('data/train.txt',n=6,gen=5000)
print(k)

In [None]:
95，61， 118，84， 172，90， 138，115， 183，127， 252，156
# yolo 层的输出 X,y,w,h,置信度，箭头坐标(x1,y1,x2,y2),类别概率6个
# 所以前一层的卷积层大小为
print(3 * (4 + 1 + 4 + 6))
# 然后yolo层的类别改成6

# 4 训练与测试

In [None]:
!python train.py --data data/yolo.data --cfg cfg/yolov3-tiny-my.cfg --device 0 --weights weights/yolov3-tiny.weights --epochs 30 --batch 16 --phased
# v01_054600 箭头标到了边界，右出结果大于1
# 忘了训练多少epoch，关系不大

In [None]:
!python train.py --data data/yolo.data --cfg cfg/yolov3-arrow6.cfg --device 0 --weights weights/yolov3.weights --epochs 200 --batch 16 --phased
# --phased
# v01_054600 箭头标到了边界，右出结果大于1
# 忘了训练多少epoch，关系不大

In [None]:
!python test.py --data data/yolo.data --cfg cfg/yolov3-arrow6.cfg --device 0 --weights weights/b6_150_last.pt

# 5 代码修改尝试

## 5.2 模型可视化

In [None]:
# 需要对loadlabel函数进行更改
    # Dataset
dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
                                augment=True,
                                hyp=hyp,  # augmentation hyperparameters
                                rect=opt.rect,  # rectangular training
                                cache_images=opt.cache_images,
                                single_cls=opt.single_cls)

# Dataloader
batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
dataloader = torch.utils.data.DataLoader(dataset,
                                            batch_size=batch_size,
                                            num_workers=nw,
                                            shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
                                            pin_memory=True,
                                            collate_fn=dataset.collate_fn)
model.train()

# Update image weights (optional)
if dataset.image_weights:
    w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
    image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
    dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx

mloss = torch.zeros(4).to(device)  # mean losses
# print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
# 进度条构建
pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------

In [None]:
# 一些应该掌握的操作
print(torch.cat((a,b),1))

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.onnx
 
import netron
 
 
class ForwardNet(nn.Module):
    def __init__(self):
        super(ForwardNet, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(64, 64, 3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 32, 1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, 3, padding=1, bias=False),
            nn.BatchNorm2d(64)
        )
 
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1, bias=False)
        self.output = nn.Sequential(
            nn.Conv2d(64, 1, 3, padding=1, bias=True),
            nn.Sigmoid()
        )
 
    def forward(self, x):
        x = self.conv1(x)
        identity = x
        x = F.relu(self.block1(x) + identity)
        x = self.output(x)
        return x
 
 
input = torch.rand(1, 3, 416, 416)
model = ForwardNet()
output = model(input)
 
onnx_path = "netForwatch.onnx"
torch.onnx.export(model, input, onnx_path)
 
netron.start(onnx_path)

In [None]:
from models import *
import torch.onnx
 
import netron

# Hyperparameters
hyp = {'giou': 3.54,  # giou loss gain
       'cls': 37.4,  # cls loss gain
       'cls_pw': 1.0,  # cls BCELoss positive_weight
       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
       'obj_pw': 1.0,  # obj BCELoss positive_weight
       'iou_t': 0.20,  # iou training threshold
       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
       'lrf': 0.0005,  # final learning rate (with cos scheduler)
       'momentum': 0.937,  # SGD momentum
       'weight_decay': 0.0005,  # optimizer weight decay
       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
       'degrees': 1.98 * 0,  # image rotation (+/- deg)
       'translate': 0.05 * 0,  # image translation (+/- fraction)
       'scale': 0.05 * 0,  # image scale (+/- gain)
       'shear': 0.641 * 0}  # image shear (+/- deg)
nc = 7
hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset
# model.train()
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
cfg = 'cfg/yolov3.cfg'
model = Darknet(cfg).to(device)

model.nc = nc  # attach number of classes to model
model.hyp = hyp  # attach hyperparameters to model
model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)

model.train()
# input = torch.rand(1, 3, 416, 416).to(device)
# # model.eval()
# output = model(input)
# print(len(output))
# onnx_path = "netForwatch.onnx"
# torch.onnx.export(model, input, onnx_path, opset_version=11)
# netron.start(onnx_path)

In [None]:
print(output[...,0].shape)#12=7+4+1
print(output[1].shape)
print(output[2].shape)

In [None]:
# 深度36变成3*12，这一步看看怎么变，我们的目标是7+4+1+2+2=16
# 所以深度可能改成48就有可能实现
# 36是yolo前一层的输出
# 
input = torch.rand(1, 48, 13, 13)
print(input[...,0:2].shape)

In [None]:
# 接下来看数据加载的target怎么搞的
# 训练时的target就是文件写入的格式
# 也就是我们是往文件的格式靠近，那test做了一件什么事情呢，关键就在于那几个约束和原先的关系
from utils.datasets import *
from tqdm import tqdm
train_path = "data/val.txt"
# Hyperparameters
hyp = {'giou': 3.54,  # giou loss gain
       'cls': 37.4,  # cls loss gain
       'cls_pw': 1.0,  # cls BCELoss positive_weight
       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
       'obj_pw': 1.0,  # obj BCELoss positive_weight
       'iou_t': 0.20,  # iou training threshold
       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
       'lrf': 0.0005,  # final learning rate (with cos scheduler)
       'momentum': 0.937,  # SGD momentum
       'weight_decay': 0.0005,  # optimizer weight decay
       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
       'degrees': 1.98 * 0,  # image rotation (+/- deg)
       'translate': 0.05 * 0,  # image translation (+/- fraction)
       'scale': 0.05 * 0,  # image scale (+/- gain)
       'shear': 0.641 * 0}  # image shear (+/- deg)
nc = 7
hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset
img_size = 416
batch_size = 16
rect = False
cache_images = False
sing = False
dataset = LoadImagesAndLabels(train_path, img_size, batch_size,
                                augment=True,
                                hyp=hyp,  # augmentation hyperparameters
                                rect=rect,  # rectangular training
                                cache_images=cache_images,
                                single_cls=sing)
dataloader = torch.utils.data.DataLoader(dataset,
                                            batch_size=batch_size,
                                            num_workers=4,
                                            shuffle=not rect,  # Shuffle=True unless rectangular training is used
                                            pin_memory=True,
                                            collate_fn=dataset.collate_fn)

pbar = tqdm(enumerate(dataloader), total=batch_size)  # progress bar
for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
    print(targets.shape)
    print(type(targets))
    print(targets)
    break

## 5.2 误差计算

In [None]:
# 其实在误差计算里面已经有对这些进行处理了
# 目前看下来只要修改输入文件，以及对输入维度的一些判断
# cfgyolo前面的维度，其他模型输出目前还不用更改
# 误差计算
# 箭头坐标点，如果用比例表示，计算误差时，大小尺寸就一样了，实际上需要考虑到包围盒的大小，所以计算误差应该借助实际包围盒的大小

# 坐标点：没有约束的坐标点，约束到包围盒里面的坐标点，直接计算误差，结合包围盒实际大小计算误差
from utils.utils import *
import torch
from models import *
import torch.onnx
 
# import netron

# Hyperparameters
hyp = {'giou': 3.54,  # giou loss gain
       'cls': 37.4,  # cls loss gain
       'arrow':1.0, # 后面可以考虑加一下，对误差比例进行缩放
       'cls_pw': 1.0,  # cls BCELoss positive_weight
       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
       'obj_pw': 1.0,  # obj BCELoss positive_weight
       'iou_t': 0.20,  # iou training threshold
       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
       'lrf': 0.0005,  # final learning rate (with cos scheduler)
       'momentum': 0.937,  # SGD momentum
       'weight_decay': 0.0005,  # optimizer weight decay
       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
       'degrees': 1.98 * 0,  # image rotation (+/- deg)
       'translate': 0.05 * 0,  # image translation (+/- fraction)
       'scale': 0.05 * 0,  # image scale (+/- gain)
       'shear': 0.641 * 0}  # image shear (+/- deg)
nc = 7
hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset
# model.train()
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
cfg = 'cfg/yolo-my.cfg'
model = Darknet(cfg).to(device)

model.nc = nc  # attach number of classes to model
model.hyp = hyp  # attach hyperparameters to model
model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)

tags = torch.tensor([[ 0.00000,  2.00000,  0.19658,  0.20487,  0.26816,  0.15605,0.1,0.2,0.3,0.4],
[ 0.00000,  1.00000,  0.78653,  0.36487,  0.14542,  0.15631, 0.1,0.2,0.3,0.4]]).to(device)
inp = torch.rand(1, 3, 416, 416).to(device)
# model.eval()
model.train()
output = model(inp)
# filter = non_max_suppression(output[0], conf_thres=0.0001, iou_thres=0.6, multi_label=True)
# print(filter[0].shape)
loss,loss_item = compute_loss(output, tags, model)

In [None]:
print(output[1].shape)
i = output[1][...,0]
print(i.shape)

In [None]:
import math as m
x = torch.Tensor([-1,2])
y = torch.Tensor([0,2])
a = torch.atan(x/y)
# m.atan(x/y)
# m.pi/4
a

In [None]:
import tensorflow as tf
import torch
import onnx
from onnx_tf.backend import prepare
import os
import numpy as np

# Hyperparameters
hyp = {'giou': 3.54,  # giou loss gain
       'cls': 37.4,  # cls loss gain
       'arrow':1.0, # 后面可以考虑加一下，对误差比例进行缩放
       'cls_pw': 1.0,  # cls BCELoss positive_weight
       'obj': 64.3,  # obj loss gain (*=img_size/320 if img_size != 320)
       'obj_pw': 1.0,  # obj BCELoss positive_weight
       'iou_t': 0.20,  # iou training threshold
       'lr0': 0.01,  # initial learning rate (SGD=5E-3, Adam=5E-4)
       'lrf': 0.0005,  # final learning rate (with cos scheduler)
       'momentum': 0.937,  # SGD momentum
       'weight_decay': 0.0005,  # optimizer weight decay
       'fl_gamma': 0.0,  # focal loss gamma (efficientDet default is gamma=1.5)
       'hsv_h': 0.0138,  # image HSV-Hue augmentation (fraction)
       'hsv_s': 0.678,  # image HSV-Saturation augmentation (fraction)
       'hsv_v': 0.36,  # image HSV-Value augmentation (fraction)
       'degrees': 1.98 * 0,  # image rotation (+/- deg)
       'translate': 0.05 * 0,  # image translation (+/- fraction)
       'scale': 0.05 * 0,  # image scale (+/- gain)
       'shear': 0.641 * 0}  # image shear (+/- deg)
nc = 7
hyp['cls'] *= nc / 80  # update coco-tuned hyp['cls'] to current dataset
# model.train()
cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if cuda else 'cpu')
cfg = 'cfg/yolov3-tiny-my.cfg'
model = Darknet(cfg)

model.nc = nc  # attach number of classes to model
model.hyp = hyp  # attach hyperparameters to model
model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)

"""
pytorch转onnx
"""
model.load_state_dict(torch.load('weights/best.pt'))
# # 输入placeholder
# dummy_input = torch.randint(0, 10000, (1, 20))
# dummy_output = model_pytorch(dummy_input)
# print(dummy_output.shape)

# # Export to ONNX format
# torch.onnx.export(model_pytorch, 
#                   dummy_input, 
#                   'model.onnx', 
#                   input_names=['inputs'], 
#                   output_names=['outputs'])

In [None]:
# torch.autograd.set_detect_anomaly(True)
loss.backward()

In [None]:
 [( tensor([0, 0, 0], device='cuda:0'), tensor([0, 0, 1]), tensor([2, 4, 2], device='cuda:0'), tensor([ 2, 10,  2], device='cuda:0')),
 # 如果这一层将三个锚盒与图片上的两个包围盒对比六次，然后发现有三个符合，
 # anchor是na*nt，锚盒数乘于目标数，然后那些筛选下来的锚盒，这是返回是哪个anchor，anchors返回的是对应的锚盒的大小
 # 然后就是对应包围盒的横坐标，纵坐标，这是根据该层网格大小确定的[是先纵坐标，再横坐标]
  (tensor([0, 0, 0, 0, 0, 0], device='cuda:0'), tensor([0, 0, 1, 1, 2, 2]), tensor([5, 9, 5, 9, 5, 9], device='cuda:0'), tensor([ 5, 20,  5, 20,  5, 20], device='cuda:0')),
  (tensor([], device='cuda:0', dtype=torch.int64), tensor([], dtype=torch.int64), tensor([], device='cuda:0', dtype=torch.int64), tensor([], device='cuda:0', dtype=torch.int64))]

In [None]:
out_targets = build_targets(output, tags, model)
print(len(out_targets))
print(out_targets[4])

In [None]:
# print(output[0].shape)

gain = torch.ones(6, device=device)
print(gain)
gain[2:] = torch.tensor([1,3,13,13,12])[[3, 2, 3, 2]]
print(gain)

In [None]:
import torch
at = torch.arange(3)
print(at)
at = at.view(3, 1) # 变成3行一列
print(at)
at = at.repeat(1, 2) # 列重复
print(at)

In [None]:
z = torch.zeros_like(at)
print(z)

In [None]:
print(loss)
print(loss_items)# 误差的量级应该尽量控制接近

In [None]:
# 因为预测了很多个，需要与这些都进行误差计算，
# 注意一下类别误差计算的维度怎么取，是否需要修改
# 然后维度设置成和置信度一样就好了
# 误差计算用了BCE逻辑回归，要看看，我看了似乎是二进制交叉熵？他说target只能是0和1？？
# 不知道置信度为什么会用逻辑回归？好奇怪，这不应该是用在分类问题吗？
# 我们还是用均方误差好一点
print(output[0].shape)
print(output[1].shape)
print(output[2].shape)

In [None]:
# loss的输入弄懂了，接下来看看他内部怎么处理
# utils主要是负责计算loss
# 接下来把思路好好整理一下，然后继续实验填写结果
# 还有为什么把target分成3和6
# 还有就是模型预测的输出不是13*13*3个包围盒
# 问题应该出现在built target
import utils
import imp
imp.reload(utils)
loss, loss_items = utils.utils.compute_loss(output, tag, model)

In [None]:
import torch
gain = torch.ones(10, device=device)
gain[2:6] = torch.tensor([1,3,13,13,12])[[3, 2, 3, 2]]
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
tags = torch.tensor([[ 0.00000,  2.00000,  0.19658,  0.20487,  0.26816,  0.15605,0.1,0.2,0.3,0.4],[ 0.00000,  1.00000,  0.78653,  0.36487,  0.14542,  0.15631,0.1,0.2,0.3,0.4]]).to(device)

In [None]:
t = tags*gain # 因为坐标归一化了，所以乘于网格大小
print(t)

In [None]:
gxy = t[:, 2:4]  # grid xy
gwh = t[:, 4:6]  # grid wh
garrow = t[:,6:10]
print(gxy,gwh,garrow)

In [None]:
garrow[:,:2]*gwh

In [None]:
garrow[:,:2] = garrow[:,:2]*gwh+gxy-(gwh/2)
garrow[:,2:4] = garrow[:,2:4]*gwh+gxy-(gwh/2)

In [None]:
gwh/2

In [None]:
# x1
x1 = torch.tensor([[11,21,31],[21,31,41]],dtype=torch.int)
x1.shape # torch.Size([2, 3])
# x2
x2 = torch.tensor([[12,22,32],[22,32,42]],dtype=torch.int)
x2.shape  # torch.Size([2, 3])


In [None]:
inputs = (x1, x2)
torch.cat(inputs, dim=1).shape