[`check_anchors`](https://github.com/WongKinYiu/yolov7/blob/main/utils/autoanchor.py)

Call in [`train_aux.py`](https://github.com/WongKinYiu/yolov7/blob/main/train_aux.py) line 270
```
    # Anchors
    if not opt.noautoanchor:
        check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
```

In [1]:
%matplotlib inline

import os
import re
import sys
import yaml
import random

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

seed=0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

## slower, more reproducible
#cudnn.benchmark, cudnn.deterministic = False, True
## faster, less reproducible
#cudnn.benchmark, cudnn.deterministic = True, False

<torch._C.Generator at 0x2182421ec10>

In [2]:

%load_ext autoreload
%autoreload 2

sys.path.append('../../../')
from video_processing.yolov7.parameter_parser import parser
from video_processing.yolov7.models.model import Model
from video_processing.yolov7.train.utils import setup_optimizer
from video_processing.yolov7.dataset.coco_dataset import LoadImagesAndLabels
from video_processing.yolov7.utils.general import one_cycle, check_image_size

In [3]:
data_dirpath='D:/data/coco'
result_dirpath='D:/results/yolov7'

argument=f"""
--data-dirpath {data_dirpath}/coco --output-dirpath {result_dirpath} 
--worker 1 --device cpu --batch-size 2 --data coco.yaml --img 1280 1280 --cfg yolov7-w6.yaml
--weights ''  --name yolov7-w6 --hyp hyp.scratch.p6.yaml 
--n-training-data 100 --n-val-data 20 --correct-exif
"""
args=parser.parse_args(argument.split())

In [4]:
device=torch.device('cpu' if not torch.cuda.is_available() or args.device=='cpu' else 'cuda')
print(device, args.batch_size)

cpu 2


In [5]:
# hyperparameters
with open(args.hyp) as f: hyp=yaml.load(f, Loader=yaml.SafeLoader)
# data
args.is_coco=len(re.findall("coco.yaml$", args.data))>0
print('args.is_coco ', args.is_coco)
with open(args.data) as f: data_dict=yaml.load(f, Loader=yaml.SafeLoader)
# number of classes
nc=1 if args.single_cls else int(data_dict['nc']) 
names=['item'] if args.single_cls and len(data_dict['names'])!=1 else data_dict['names'] # class names
assert len(names)==nc, f'There are {len(names)} class names but {nc} classes' 

args.is_coco  True


In [6]:
model=Model(args.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # it is safer to move model to device first and then create optimizer
train_dataset=LoadImagesAndLabels(data_dirpath=args.data_dirpath, image_paths=data_dict['train'], img_size=args.img_size[0],
                            augment=True, hyp=hyp, n_data=args.n_training_data, correct_exif=args.correct_exif)

In IAxDetect nl: 4 na: 3
In IAxDetect anchors: torch.Size([4, 3, 2]) 4x3x2
In IAxDetect anchor_grid: torch.Size([4, 1, 3, 1, 1, 2]) 4x1x3x1x1x2
In dataset.coco_dataset.__init__ save cache to D:\data\coco\coco\labels\train2017.cache cache_path.is_file() True


In [121]:
from video_processing.yolov7.dataset.anchors import check_anchor_matching
check_anchor_matching(dataset=train_dataset, model=model, thr=4., imgsz=args.img_size[0])

In dataset.anchors.check_anchor_matching anchors/target=5.53, Best possible recall (BPR): 0.9963


In [113]:
from video_processing.yolov7.dataset.anchors import best_possible_recall_metric, kmean_anchors
from video_processing.yolov7.models.common import check_anchor_order

In [15]:
dataset=train_dataset
thr=4.
imgsz=args.img_size[0]

In [None]:
def check_anchor_matching(dataset, model, thr=4., imgsz=640):
    '''
    Check whether anchors is not thrx bigger or smaller than boxes in training data. If not, attempt to update it using genetic-evolved kmean
    Args:
        dataset: contains normalized box information [normalized box center (x,y) and normalized width/height (w,h)]. It must also
            contain width/heights of all images (used to denormalized boxes)
        model: contain detection module with anchors, anchor_grid, and stride information as input anchors to check and to update 
            if needed
        thr (float): largest factor that allows anchors to be bigger/smaller than boxes in training data
        imgz (int): input image size for traing model (not original image size)
    '''
    # get detection module
    module=model.model[-1]
    # make the maximum size = imgsz while keeping the aspect ratio consistent
    shapes=imgsz*dataset.image_sizes/dataset.image_sizes.max(1, keepdims=True) # Nx2 where N is the number of images
    scale=np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # Nx1 augment scale
    # change normalized width,height to width/height in pixel unit using scaled_shape then stack along number of boxes to Nx2
    boxes_width_heights=torch.from_numpy(np.concatenate([s[None,:]*l[:,3:] for s, l in zip(shapes*scale, dataset.labels)], axis=0)).float()

    # nlx1xnax1x1x2 -> Mx2 width and height of each anchors
    anchors_width_heights=module.anchor_grid.clone().cpu().view(-1, 2)
    bpr,n_anchors=best_possible_recall_metric(boxes_wh=boxes_width_heights, anchors_wh=anchors_width_heights, threshold=thr)
    print(f'In dataset.anchors.check_anchor_matching anchors/target={n_anchors:.2f}, Best possible recall (BPR): {bpr:.4f}')

    if bpr>0.98: return
        
    # recompute anchors
    n_anchors=module.anchor_grid.numel()//2 # divide by 2 because this contain both width and height
    try: new_anchor_width_heights=kmean_anchors(dataset=dataset, n_anchors=n_anchors, img_size=imgsz, thr=thr, n_generations=1000, verbose=False)
    except Exception as e: print(f'Error: {e}')
    new_bpr=best_possible_recall_metric(boxes_wh=boxes_width_heights, anchors_wh=new_anchor_width_heights, threshold=thr)[0]
    
    if new_bpr<bpr: 
        print(f'In dataset.anchors.check_anchor_matching new anchors have lower bpr of {new_bpr} compared to {bpr}--used old one')
        return

    new_anchor_width_height_tensor=torch.from_numpy(new_anchor_width_heights).to(device=module.anchors.device, dtype=module.anchors.dtype)
    module.anchor_grid=new_anchor_width_height_tensor.clone().view_as(module.anchor_grid)
    print(module.anchors.shape, module.stride.shape)
    # normalize the anchors from image grid to feature cell grid
    module.anchors=new_anchor_width_height_tensor.clone().view_as(module.anchors)/module.stride.to(module.anchors.device).view(-1,1,1)
    check_anchor_order(module)
    print('In dataset.anchors.check_anchor_matching:: New anchor width/height has been estimated. Please update configuration file')
    

In [42]:
# get detection module
module=model.model[-1]
# make the maximum size = imgsz while keeping the aspect ratio consistent
shapes=imgsz*dataset.image_sizes/dataset.image_sizes.max(1, keepdims=True) # Nx2 where N is the number of images
scale=np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # Nx1 augment scale
# change normalized width,height to width/height in pixel unit using scaled_shape
# then stack along number of boxes to Nx2
boxes_width_heights=torch.from_numpy(np.concatenate([s[None,:]*l[:,3:] for s, l in zip(shapes*scale, dataset.labels)], axis=0)).float()
print('boxes_width_heights ', boxes_width_heights.shape, boxes_width_heights.dtype)

boxes_width_heights  torch.Size([802, 2]) torch.float32


In [57]:
# nlx1xnax1x1x2 -> Mx2 width and height of each anchors
anchors_width_heights=module.anchor_grid.clone().cpu().view(-1, 2)
print('anchors_width_heights ', anchors_width_heights.shape, anchors_width_heights.dtype)
bpr,n_anchors=best_possible_recall_metric(boxes_wh=boxes_width_heights, anchors_wh=anchors_width_heights, threshold=thr)
print(f'anchors/target={n_anchors:.2f}, Best possible recall (BPR): {bpr:.4f}')

anchors_width_heights  torch.Size([12, 2]) torch.float32
anchors/target=5.68, Best possible recall (BPR): 0.9975


In [115]:
# if bpr<0.98: recompute anchors
n_anchors=module.anchor_grid.numel()//2 # divide by 2 because this contain both width and height
print('n_anchors ', n_anchors)
try: new_anchor_width_heights=kmean_anchors(dataset=dataset, n_anchors=n_anchors, img_size=imgsz, thr=thr, n_generations=1000, verbose=False)
except Exception as e: print(f'Error: {e}')
print('new_anchor_width_heights ', new_anchor_width_heights.shape)
new_bpr=best_possible_recall_metric(boxes_wh=boxes_width_heights, anchors_wh=new_anchor_width_heights, threshold=thr)[0]
print('new_bpr ', new_bpr, ' bpr ', bpr)

n_anchors  12
Running Kmeans for 12 on 802 boxes
threshold=0.25: 0.9925 best possible recall, 5.45 anchors passes the threshold
n_anchors=12, img_size=1280, metric_all=0.279/0.716 mean/best, past_threshold 0.473-mean: (75,148), (866,462), (466,297), (50,69), (25,31), (178,357), (317,580), (1195,741), (149,100), (272,183), (583,924), (117,226)
new_anchor_width_heights  (12, 2)
new_bpr  tensor(0.9963)  bpr  tensor(0.9975)


In [120]:
# if new_bpr>bpr:
new_anchor_width_height_tensor=torch.from_numpy(new_anchor_width_heights).to(device=module.anchors.device, dtype=module.anchors.dtype)
module.anchor_grid=new_anchor_width_height_tensor.clone().view_as(module.anchor_grid)
print(module.anchors.shape, module.stride.shape)
# normalize the anchors from image grid to feature cell grid
module.anchors=new_anchor_width_height_tensor.clone().view_as(module.anchors)/module.stride.to(module.anchors.device).view(-1,1,1)
check_anchor_order(module)
print('New anchor width/height has been estimated. Please update configuration file')

(torch.Size([4, 3, 2]), torch.Size([4]))

In [118]:
module.anchor_grid

tensor([[[[[[  57.7727,  150.0484]]],


          [[[ 932.3475,  430.9550]]],


          [[[ 414.1675,  257.0540]]]]],




        [[[[[  36.0162,   57.3181]]],


          [[[  19.9298,   25.1142]]],


          [[[ 185.3887,  301.4194]]]]],




        [[[[[ 322.3356,  530.1092]]],


          [[[1140.8296,  818.9543]]],


          [[[ 108.1660,   87.4579]]]]],




        [[[[[ 196.4072,  152.4655]]],


          [[[ 539.0780,  816.8771]]],


          [[[  92.1634,  178.1084]]]]]])

In [None]:
check_anchor_order