[`check_anchors`](https://github.com/WongKinYiu/yolov7/blob/main/utils/autoanchor.py)

Call in [`train_aux.py`](https://github.com/WongKinYiu/yolov7/blob/main/train_aux.py) line 270
```
    # Anchors
    if not opt.noautoanchor:
        check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
```

In [1]:
%matplotlib inline

import os
import re
import sys
import yaml
import random

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

seed=0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

## slower, more reproducible
#cudnn.benchmark, cudnn.deterministic = False, True
## faster, less reproducible
#cudnn.benchmark, cudnn.deterministic = True, False

<torch._C.Generator at 0x2182421ec10>

In [2]:

%load_ext autoreload
%autoreload 2

sys.path.append('../../../')
from video_processing.yolov7.parameter_parser import parser
from video_processing.yolov7.models.model import Model
from video_processing.yolov7.train.utils import setup_optimizer
from video_processing.yolov7.dataset.coco_dataset import LoadImagesAndLabels
from video_processing.yolov7.utils.general import one_cycle, check_image_size

In [3]:
data_dirpath='D:/data/coco'
result_dirpath='D:/results/yolov7'

argument=f"""
--data-dirpath {data_dirpath}/coco --output-dirpath {result_dirpath} 
--worker 1 --device cpu --batch-size 2 --data coco.yaml --img 1280 1280 --cfg yolov7-w6.yaml
--weights ''  --name yolov7-w6 --hyp hyp.scratch.p6.yaml 
--n-training-data 100 --n-val-data 20 --correct-exif
"""
args=parser.parse_args(argument.split())

In [4]:
device=torch.device('cpu' if not torch.cuda.is_available() or args.device=='cpu' else 'cuda')
print(device, args.batch_size)

cpu 2


In [5]:
# hyperparameters
with open(args.hyp) as f: hyp=yaml.load(f, Loader=yaml.SafeLoader)
# data
args.is_coco=len(re.findall("coco.yaml$", args.data))>0
print('args.is_coco ', args.is_coco)
with open(args.data) as f: data_dict=yaml.load(f, Loader=yaml.SafeLoader)
# number of classes
nc=1 if args.single_cls else int(data_dict['nc']) 
names=['item'] if args.single_cls and len(data_dict['names'])!=1 else data_dict['names'] # class names
assert len(names)==nc, f'There are {len(names)} class names but {nc} classes' 

args.is_coco  True


In [6]:
model=Model(args.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # it is safer to move model to device first and then create optimizer
train_dataset=LoadImagesAndLabels(data_dirpath=args.data_dirpath, image_paths=data_dict['train'], img_size=args.img_size[0],
                            augment=True, hyp=hyp, n_data=args.n_training_data, correct_exif=args.correct_exif)

In IAxDetect nl: 4 na: 3
In IAxDetect anchors: torch.Size([4, 3, 2]) 4x3x2
In IAxDetect anchor_grid: torch.Size([4, 1, 3, 1, 1, 2]) 4x1x3x1x1x2
In dataset.coco_dataset.__init__ save cache to D:\data\coco\coco\labels\train2017.cache cache_path.is_file() True


In [56]:
from video_processing.yolov7.dataset.anchors import best_possible_recall_metric

In [15]:
dataset=train_dataset
thr=4.
imgsz=args.img_size[0]

In [42]:
# get detection module
module=model.model[-1]
# make the maximum size = imgsz while keeping the aspect ratio consistent
shapes=imgsz*dataset.image_sizes/dataset.image_sizes.max(1, keepdims=True) # Nx2 where N is the number of images
scale=np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # Nx1 augment scale
# change normalized width,height to width/height in pixel unit using scaled_shape
# then stack along number of boxes to Nx2
boxes_width_heights=torch.from_numpy(np.concatenate([s[None,:]*l[:,3:] for s, l in zip(shapes*scale, dataset.labels)], axis=0)).float()
print('boxes_width_heights ', boxes_width_heights.shape, boxes_width_heights.dtype)

boxes_width_heights  torch.Size([802, 2]) torch.float32


In [57]:
# nlx1xnax1x1x2 -> Mx2 width and height of each anchors
anchors_width_heights=module.anchor_grid.clone().cpu().view(-1, 2)
print('anchors_width_heights ', anchors_width_heights.shape, anchors_width_heights.dtype)
bpr,n_anchors=best_possible_recall_metric(boxes_wh=boxes_width_heights, anchors_wh=anchors_width_heights, threshold=thr)
print(f'anchors/target={n_anchors:.2f}, Best possible recall (BPR): {bpr:.4f}')

anchors_width_heights  torch.Size([12, 2]) torch.float32
anchors/target=5.68, Best possible recall (BPR): 0.9975


In [61]:
n_anchors=module.anchor_grid.numel()//2 # divide by 2 because this contain both width and height
print('n_anchors ', n_anchors)
#anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)

n_anchors  12


In [79]:
from scipy.cluster.vq import kmeans


def kmean_anchors(dataset, n_anchors, img_size,thr=4.0, gen=1000, verbose=False): 
    '''
    Create kmean-evolved anchors from training data
    Args:
        dataset: loaded dataset
        n_anchors (int): number of anchors
        img_size (int): training data size
        thr (float): ratio of width/height between anchors and ground-truth boxes
        gen (int): generations to evolve anchors using genetic algorithm
        verbose (bool): print result
        
    '''
    def metric(k, wh):
        '''
        Compute size matching score where 1 means best match and < 0 means worst match
        Args:
            k (Tensor): Kx2 where K is the number of boxes and 2 for width and height
            wh (Tensor): Mx2 where M is the number of reference boxes and 2 for width and height
        Returns:
            scores (Tensor): MxK size matching scores 
            best_scores (Tensor): M best matching scores for each box in k
        '''
        ratio=wh[:,None]/k[None] # MxKx2
        scores=torch.minimum(ratio, 1./ratio).min(dim=2).values # MxK matching scores
        return scores, scores.max(dim=1).values

    def print_results(k):
        '''
        Args:
            k (Tensor): Kx2 where K is the number of anchors and 2 is for width and height 
        '''
        scores, best_scores=metric(k, original_boxes_width_height)
        # best possible recall and the number of anchors with scores > input-score
        bpr, n_matched_achors=(best_scores>thr).float().mean(), (scores>thr).float().sum()
        print(f'threshold={thr:.2f}: {bpr:.4f} best possible recall, {n_matched_achors:.2f} anchors passes the threshold')
        print(f'n_anchors={n_anchors}, img_size={img_size}, metric_all={scores.mean():.3f}/{best_scores.mean():.3f} mean/best', end=', ')
        print(f'past_threshold {scores[scores>thr].mean():.3f}-mean: ', end='')
        for i, x in enumerate(k): print(f'({int(round(x[0].item()))},{int(round(x[1].item()))})', end=', ' if i<len(k)-1 else '\n' )
            
    score=1./thr
    # training data image size, keeping original aspect ratio between width and height 
    image_sizes=img_size*dataset.image_sizes/dataset.image_sizes.max(axis=1, keepdims=True) # Nx2 where N is the number of training data
    # convert normalized width and height stored as labels to width and height in pixel unit 
    original_boxes_width_height=np.concatenate([l[:, 3:5]*s[None,:] for l, s in zip(dataset.labels, image_sizes)], axis=0) # Mx2

    # filter
    small_boxes=(original_boxes_width_height<3.).any(axis=1).sum()
    if small_boxes>0: print(f'Warning: Extremely small objects found. {small_boxes} of {len(original_boxes_width_height)} labels are < 3 pixels in size ')
    filtered_boxes_width_height=original_boxes_width_height[(original_boxes_width_height>=2.).any(axis=1)] # we allow only boxes > 2 pixel width and height

    # Kmean
    print(f'Running Kmeans for {n_anchors} on {len(filtered_boxes_width_height)} boxes')
    std=filtered_boxes_width_height.std(axis=0) # 2 element sigmas for whitening of width and height
    k, dist=kmeans(filtered_boxes_width_height/std, n_anchors, iter=30) # n_anchorsx2 points and scalar mean distance
    assert len(k)==n_anchors, f'Error: scipy.cluster.vq.kmeans requested {n_anchors} points but returns only {len(k)}'
    k*=s[None,:] # n_anchors x 2
    filtered_boxes_width_height=torch.from_numpy(filtered_boxes_width_height).float()
    original_boxes_width_height=torch.from_numpy(original_boxes_width_height).float()
    print('filtered_boxes_width_height ', filtered_boxes_width_height.shape, ' original_boxes_width_height ',
         original_boxes_width_height.shape)
    
    # sort anchors by areas from small to large
    k=torch.from_numpy(k[np.argsort(k.prod(axis=1))]).float()
    print_results(k)

anchors = kmean_anchors(dataset=dataset, n_anchors=n_anchors, img_size=imgsz, thr=thr, gen=1000, verbose=True)

Running Kmeans for 12 on 802 boxes
filtered_boxes_width_height  torch.Size([802, 2])  original_boxes_width_height  torch.Size([802, 2])
threshold=4.00: 0.0000 best possible recall, 0.00 anchors passes the threshold
n_anchors=12, img_size=1280, metric_all=0.162/0.528 mean/best, past_threshold nan-mean: (122,103), (238,232), (356,515), (696,335), (587,766), (1332,601), (881,1197), (2286,1001), (1556,1952), (4255,1554), (2864,3108), (5868,2492)


In [72]:
n_anchors

12