In [1]:
%matplotlib inline

import os
import re
import sys
import yaml
import random

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

seed=0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

## slower, more reproducible
#cudnn.benchmark, cudnn.deterministic = False, True
## faster, less reproducible
#cudnn.benchmark, cudnn.deterministic = True, False

<torch._C.Generator at 0x1f67fe60c30>

In [2]:

%load_ext autoreload
%autoreload 2

sys.path.append('../../../')
from video_processing.yolov7.parameter_parser import parser
from video_processing.yolov7.models.model import Model
from video_processing.yolov7.train.utils import setup_optimizer, labels_to_class_weights
from video_processing.yolov7.dataset.coco_dataset import LoadImagesAndLabels
from video_processing.yolov7.utils.general import one_cycle, check_image_size
from video_processing.yolov7.dataset.anchors import check_anchor_matching

In [3]:
data_dirpath='D:/data/coco'
result_dirpath='D:/results/yolov7'

argument=f"""
--data-dirpath {data_dirpath}/coco --output-dirpath {result_dirpath} 
--worker 1 --device cpu --batch-size 2 --data coco.yaml --img 1280 1280 --cfg yolov7-w6.yaml
--weights ''  --name yolov7-w6 --hyp hyp.scratch.p6.yaml 
--n-training-data 100 --n-val-data 20 --correct-exif
"""
args=parser.parse_args(argument.split())

device=torch.device('cpu' if not torch.cuda.is_available() or args.device=='cpu' else 'cuda')
print(device, args.batch_size)

# hyperparameters
with open(args.hyp) as f: hyp=yaml.load(f, Loader=yaml.SafeLoader)

cpu 2


In [4]:
nc=80
# define model and optimizers
model=Model(args.cfg, ch=3, nc=nc, anchors=hyp.get('anchors')).to(device)  # it is safer to move model to device first and then create optimizer


In IAxDetect nl: 4 na: 3
In IAxDetect anchors: torch.Size([4, 3, 2]) 4x3x2
In IAxDetect anchor_grid: torch.Size([4, 1, 3, 1, 1, 2]) 4x1x3x1x1x2


In [5]:
with open(args.data) as f: data_dict=yaml.load(f, Loader=yaml.SafeLoader)
# train/val data loader
train_dataset=LoadImagesAndLabels(data_dirpath=args.data_dirpath, image_paths=data_dict['train'], img_size=args.img_size[0],
                            augment=True, hyp=hyp, n_data=args.n_training_data, correct_exif=args.correct_exif)
train_loader=torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.batch_size, num_workers=1, pin_memory=True, 
                                        collate_fn=LoadImagesAndLabels.collate_fn)

In dataset.coco_dataset.__init__ save cache to D:\data\coco\coco\labels\train2017.cache cache_path.is_file() True


In [6]:
from video_processing.yolov7.loss.utils import find_5_positive

In [7]:
class ComputeLossAuxOTA:
    def __init__(self, model, cls_pw, obj_pw, label_smoothing):
        '''
        Args:
            cls_pw (float/sequence): positive weight for class classification in BCEWithLogitsLoss
            obj_pw (float/sequence): positive weight for objectness in BCEWithLogitsLoss
            label_smoothing (float): label smoothing eps
        '''
        super(ComputeLossAuxOTA, self).__init__()
        device=next(model.parameters()).device
        self.BCEcls=nn.BCEWithLogitsLoss(pos_weight=torch.tensor([cls_pw], device=device))
        self.BCEobj=nn.BCEWithLogitsLoss(pos_weight=torch.tensor([obj_pw], device=device))

        # positive and negative class
        # https://arxiv.org/pdf/1902.04103.pdf eqn 3
        # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
        self.positive_class, self.negative_class=1.0 - 0.5*label_smoothing, 0.5*label_smoothing

        self.balance=[4.,1.,0.4] if model.model[-1].nl<4 else [4., 1., .25, .06, .02]

        self.ssi=0 #?
        self.gr=model.gr #?
        for k in 'na,nc,nl,anchors,stride'.split(','): setattr(self, k, getattr(model.model[-1], k))
            
model.gr=1. # iou loss ratio (obj_loss =1 or iou)?
loss_module=ComputeLossAuxOTA(model, cls_pw=hyp['cls_pw'], obj_pw=hyp['obj_pw'], label_smoothing=args.label_smoothing)

In [8]:
imgs, targets, paths=next(iter(train_loader))
print('imgs ', imgs.shape, imgs.dtype, imgs.min().item(), imgs.max().item())
print('targets ', targets.shape, targets.dtype, [f'{x:.2f}' for x in targets.min(0).values.tolist()], 
      [f'{x:.2f}' for x in targets.max(0).values.tolist()])

imgs  torch.Size([2, 3, 1280, 1280]) torch.uint8 0 154
targets  torch.Size([4, 6]) torch.float32 ['0.00', '0.00', '0.50', '0.40', '0.06', '0.06'] ['1.00', '32.00', '0.67', '0.71', '1.00', '0.60']


In [9]:
imgs=imgs.to(device, non_blocking=True).float() / 255.0
pred = model(imgs)  # forward
print(type(pred), type(pred[0]), len(pred), [p.shape for p in pred])
#loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs)  #

<class 'list'> <class 'torch.Tensor'> 8 [torch.Size([2, 3, 160, 160, 85]), torch.Size([2, 3, 80, 80, 85]), torch.Size([2, 3, 40, 40, 85]), torch.Size([2, 3, 20, 20, 85]), torch.Size([2, 3, 160, 160, 85]), torch.Size([2, 3, 80, 80, 85]), torch.Size([2, 3, 40, 40, 85]), torch.Size([2, 3, 20, 20, 85])]


In [10]:
print(len(pred), [p.shape for p in pred])

8 [torch.Size([2, 3, 160, 160, 85]), torch.Size([2, 3, 80, 80, 85]), torch.Size([2, 3, 40, 40, 85]), torch.Size([2, 3, 20, 20, 85]), torch.Size([2, 3, 160, 160, 85]), torch.Size([2, 3, 80, 80, 85]), torch.Size([2, 3, 40, 40, 85]), torch.Size([2, 3, 20, 20, 85])]


In [11]:
imgs.shape

torch.Size([2, 3, 1280, 1280])

In [12]:
import copy
from video_processing.yolov7.loss.utils import determine_matching_target_for_auxillary_heads, find_5_positive

# def __call__(self, predictions, targets, images):
# bs_aux, as_aux_, gjs_aux, gis_aux, targets_aux, anchors_aux = self.build_targets2(p[:self.nl], targets, imgs)
# find the targets in cell grid unit that match anchors for training auxillary head
indices4aux, anch4aux =find_5_positive(prediction=pred[:loss_module.nl], targets=targets, anchors=loss_module.anchors,
                                             matching_threshold=hyp['anchor_t'], inside_grid_cell=1.)
bs_aux, as_aux_, gjs_aux, gis_aux, targets_aux, anchors_aux \
=determine_matching_target_for_auxillary_heads(prediction=pred[:loss_module.nl], targets=targets, indices=indices4aux, anch=anch4aux,
                                               stride=loss_module.stride, image_size=imgs.shape[2], n_classes=loss_module.nc)

indices4main, anch4main=find_5_positive(prediction=pred[:loss_module.nl], targets=targets, anchors=loss_module.anchors,
                                             matching_threshold=hyp['anchor_t'], inside_grid_cell=.5)
bs, as_,gjs, gis, targets, anchors \
=determine_matching_target_for_auxillary_heads(prediction=pred[:loss_module.nl], targets=targets, indices=indices4main, anch=anch4main,
                                               stride=loss_module.stride, image_size=imgs.shape[2], n_classes=loss_module.nc)

In [18]:
matching_bs

[tensor([1]),
 tensor([1, 1, 1, 1, 1]),
 tensor([0, 1, 1, 1]),
 tensor([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1])]