In [1]:
%matplotlib inline
__import__("os").environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

In [2]:
import os
import copy
import matplotlib.pyplot as plt

import torch
import random
import numpy as np

seed=0
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

In [3]:
import sys
sys.path.append('../../..')

%load_ext autoreload
%autoreload 2

from computer_vision.yolov11.modules.detector import DetectionModel
from computer_vision.yolov11.parameter_parser import parser
from computer_vision.yolov11.utils.check import check_imgsz
from computer_vision.yolov11.data.dataset import YOLODataset

In [4]:
data_dirpath=r'D:/data/ultralytics/coco128'
result_dirpath='D:/results/yolov11/training'

argument=f'''--root {data_dirpath} --train-image-dirname images/train2017 --train-label-dirname labels/train2017
--val-image-dirname images/train2017 --val-label-dirname labels/train2017
--data-cfg ../coco128.yaml --hyperparam ../default.yaml --model-cfg ../yolo11.yaml 
--batch-size 2 --output-dirpath {result_dirpath} --checkpoint-dirpath {result_dirpath}/checkpoints 
--save-period 1 '''
args=parser.parse_args(argument.split())

if not os.path.isdir(args.checkpoint_dirpath): os.makedirs(args.checkpoint_dirpath)

In [5]:
from __future__ import annotations

from typing import Any
from pathlib import Path
from argparse import Namespace

import yaml

from computer_vision.yolov11.utils.torch_utils import init_seeds

class DetectionTrainer:

    def __init__(self, args:Namespace, cfg:str | dict, inch:int=3):
        """
        Initialize a DetectionTrainer object for training YOLO
        Args:
            args (Namespace): Training parameters
            cfg (str | dict): Configuration dict containing training parameters
            inch (int): The number of input channels
        """
        if isinstance(cfg, str):
            cfg=Path(cfg) # Hyperparameters
            assert cfg.is_file(), f'{cfg} does not exist'
            with open(cfg) as f: self.cfg=yaml.load(f, Loader=yaml.SafeLoader)
        elif not isinstance(cfg, dict): raise TypeError(f'cfg must be dict/str but got {type(cfg)}')
        else: self.cfg=cfg
            
        if isinstance(args.data_cfg, str):
            data=Path(args.data_cfg)
            assert data.is_file(), f'{data} does not exist'
            with open(data, encoding="utf8") as f: self.data=yaml.load(f, Loader=yaml.SafeLoader)

        self.inch=inch 
        # Merge the namespace without overriding the original args
        for k, v in vars(Namespace(**self.cfg)).items():
            if not hasattr(args, k): setattr(args, k, v)
        self.args=args
        self.device=torch.device('cpu') if not torch.cuda.is_available() else torch.device('cuda')
        
        init_seeds(seed=self.args.seed, deterministic=self.args.deterministic)

        # Directories
        self.save_dir=Path(self.args.output_dirpath)
        self.wdir=Path(self.args.checkpoint_dirpath) # weight directory
        if not self.save_dir.is_dir(): os.makedirs(self.save_dir)
        if not self.wdir.is_dir(): os.makedirs(self.wdir)
        self.last, self.best=self.wdir/self.args.latest_checkpoint, self.wdir/self.args.best_checkpoint
        self.save_period=self.args.save_period

        self.batch_size=self.args.batch_size
        # in case users accidentally pass epochs=None 
        self.epochs=self.args.epochs or 100 
        self.start_epoch=0

        # setting worker=0 yields faster CPU training as time dominated by inference, not dataloading
        if self.device.type in {'cpu', 'mps'}: self.args.worker=0

        # check data 
        self.args.root=Path(self.args.root)
        for dirname in [args.train_image_dirname, args.train_label_dirname, args.val_image_dirname, args.val_label_dirname]:
            assert (self.args.root/dirname).is_dir(), f'{Path(self.args.root)/dirname} does not exist'

        self.ema=None

        # Optimization utils init
        self.lf=None
        self.scheduler=None

        # Epoch level metrics
        self.best_fitness=None
        self.fitness=None
        self.loss=None
        self.tloss=None
        self.loss_names=["Loss"]
        self.cvs=self.save_dir/"result.csv"
        self.plot_idx=[0,1,2]
        self.world_size=0 # single GPU training
        

In [6]:
trainer=DetectionTrainer(args, cfg=args.hyperparam, inch=3)

In [None]:
# _do_train(self)
#    self._setup_train()

In [17]:
# def _setup_train(self):

trainer.model=DetectionModel(cfg=trainer.args.model_cfg, ch=trainer.inch)
trainer.model.names=trainer.data["names"]

always_freeze_names=['.dfl'] # always freeze these layers
for k, v in trainer.model.named_parameters():
    if any(x in k for x in always_freeze_names):
        print(f'Freezing layer {k}')
        v.requires_grad=False
    elif not v.requires_grad and v.dtype.is_floating_point: 
        # only floating point can require gradients
        print(f'Unfreeze layer {k}')
        v.requires_grad=True

# Check imgsz
gs=max( (int(trainer.model.stride.max()) if hasattr(trainer.model, 'stride') else 32), 32 ) # grid size / max stride
trainer.args.imgsz=check_imgsz(trainer.args.imgsz, stride=gs, floor=gs, max_dim=1) 
print('gs ', gs, ' trainer.args.imgsz ', trainer.args.imgsz)
trainer.stride=gs # for multiscale training

# Dataloaders
train_dataset=YOLODataset(img_path=(trainer.args.root/trainer.args.train_image_dirname),
                          label_path=(trainer.args.root/trainer.args.train_label_dirname),
                          data=trainer.data, hyp=trainer.cfg, imgsz=trainer.args.imgsz, cache=True, augment=True, rect=False,
                          batch_size=trainer.args.batch_size, stride=gs, pad=0.5,  single_cls=False, classes=None, fraction=1.,
                          channels=trainer.inch)
val_dataset=YOLODataset(img_path=(trainer.args.root/trainer.args.val_image_dirname),
                        label_path=(trainer.args.root/trainer.args.val_label_dirname),
                        data=trainer.data, hyp=trainer.cfg, imgsz=trainer.args.imgsz, cache=True, augment=False, rect=False, 
                        batch_size=trainer.args.batch_size, stride=gs, pad=0.5,  single_cls=False, classes=None, fraction=1., channels=trainer.inch)
train_loader=torch.utils.data.DataLoader(dataset=train_dataset, batch_size=trainer.args.batch_size, shuffle=False, sampler=None, batch_sampler=None, 
                                       num_workers=trainer.args.worker, collate_fn=YOLODataset.collate_fn, pin_memory=False, drop_last=True, 
                                       timeout=0, worker_init_fn=None, prefetch_factor=None, persistent_workers=False)
val_loader=torch.utils.data.DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False, sampler=None, batch_sampler=None, 
                                       num_workers=trainer.args.worker, collate_fn=YOLODataset.collate_fn, pin_memory=False, drop_last=False, 
                                       timeout=0, worker_init_fn=None, prefetch_factor=None, persistent_workers=False)

In BaseModel._predict_once max_idx -1 embed {-1}
Freezing layer model.23.dfl.conv.weight
gs  32  trainer.args.imgsz  640
