From 67c20ae317bf8704ac13657bbfab9bee0bf7cf6a Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Sat, 13 Feb 2021 19:09:00 -0500
Subject: [PATCH 1/6] base ultralytics integration

---
 examples/ultralytics-sparseml/README.md | 100 ++++
 examples/ultralytics-sparseml/main.py   | 722 ++++++++++++++++++++++++
 2 files changed, 822 insertions(+)
 create mode 100644 examples/ultralytics-sparseml/README.md
 create mode 100644 examples/ultralytics-sparseml/main.py

diff --git a/examples/ultralytics-sparseml/README.md b/examples/ultralytics-sparseml/README.md
new file mode 100644
index 00000000000..67693654fec
--- /dev/null
+++ b/examples/ultralytics-sparseml/README.md
@@ -0,0 +1,100 @@
+<!--
+Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# SparseML-ultralytics/yolov5 integration
+This directory provides a SparseML integrated training script for the popular
+[ultralytics/yolov5](https://github.com/ultralytics/yolov5)
+repository also known as [timm](https://pypi.org/project/timm/).
+
+Using this integration, you will be able to apply SparseML optimizations
+to the powerful training flows provided in the yolov5 repository.
+
+Some of the tasks you can perform using this integration include, but are not limited to:
+* model pruning
+* quantization-aware-training
+* sparse quantization-aware-training
+* sparse transfer learning
+
+## Installation
+To use both the script, clone both repositories, install their dependencies,
+and copy the integrated training script into the yolov5 directory to run from.
+
+```bash
+# clone
+git clone https://github.com/ultralytics/yolov5.git
+git clone https://github.com/neuralmagic/sparseml.git
+
+# copy script
+cp sparseml/examples/ultralytics-sparseml/main.py yolov5
+cd yolov5
+
+# install dependencies
+pip install -r requirements.txt
+pip install sparseml
+```
+
+
+## Script
+`examples/timm-sparseml/main.py` modifies
+[`train.py`](https://github.com/ultralytics/yolov5/blob/master/train.py)
+from yolov5 to include a `sparseml-recipe-path` argument
+to run SparseML optimizations with.  This can be a file path to a local
+SparseML recipe or a SparseZoo model stub prefixed by `zoo:` such as
+`zoo:cv/detection/yolo_v3-spp/pytorch/ultralytics/coco/pruned-aggressive`.
+
+Additionally, for sparse transfer learning, the flag `--sparse-transfer-learn`
+was added.  Running the script with this flag will add modifiers to the given
+recipe that will keep the base sparsity constant during training, allowing
+the model to learn the new dataset while keeping the same optimized structure.
+If a SparseZoo recipe path is provided with sparse transfer learning enabled,
+then the the model's specific "transfer" recipe will be loaded instead.
+
+To load the base weights for a SparseZoo recipe as the initial checkpoint, set
+`--initial-checkpoint` to `zoo`.  To use the weights of a SparseZoo model as the
+initial checkpoint, pass that model's SparseZoo stub prefixed by `zoo:` to the
+`--initial-checkpoint` argument.
+
+Running the script will
+follow the normal yolov5 training flow with the given SparseML optimizations enabled.
+
+Some considerations:
+
+* `--sparseml-recipe-path` is a required parameter
+* `--epochs` will now be overridden by the epochs set in the SparseML recipe
+* if using learning rate schedulers both with the yolov5 script and your recipe, they
+may conflict with each other causing unintended side effects, choose
+hyperparameters accordingly.
+* Modifiers will log their outputs to the console as well as to the tensorboard file
+* After training is complete, the final model will be exported to ONNX using SparseML
+
+You can learn how to build or download a recipe using the
+[SparseML](https://github.com/neuralmagic/sparseml)
+or [SparseZoo](https://github.com/neuralmagic/sparsezoo)
+documentation, or export one with [Sparsify](https://github.com/neuralmagic/sparsify).
+
+Documentation on the original script can be found
+[here](https://github.com/ultralytics/yolov5).
+The latest commit hash that `main.py` is based on is included in the docstring.
+
+
+#### Example Command
+Call the script from the `yolov5` directory, passing in the same arguments as
+`train.py`, with the additional SparseML argument(s) included.
+```bash
+python main.py \
+  --sparseml-recipe-path /PATH/TO/RECIPE/recipe.yaml \
+  <regular yolov5/train.py paramters>
+```  
diff --git a/examples/ultralytics-sparseml/main.py b/examples/ultralytics-sparseml/main.py
new file mode 100644
index 00000000000..50cb3e7ca59
--- /dev/null
+++ b/examples/ultralytics-sparseml/main.py
@@ -0,0 +1,722 @@
+# neuralmagic: no copyright
+# flake8: noqa
+# fmt: off
+# isort: skip_file
+
+"""
+Integration between https://github.com/ultralytics/yolov5 and SparseML
+
+This script is adapted from https://github.com/ultralytics/yolov5/blob/master/train.py
+to apply a SparseML recipe from the required `--sparseml-recipe-path` argument.
+Integration lines are preceded by comment blocks.  Run with `--help` for help printout,
+more information can be found in the readme file.
+
+Latest yolov5 commit this script is based on: c9bda11
+"""
+import argparse
+import logging
+import math
+import os
+import random
+import time
+from pathlib import Path
+from threading import Thread
+
+import numpy as np
+import torch.distributed as dist
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+import torch.optim.lr_scheduler as lr_scheduler
+import torch.utils.data
+import yaml
+from torch.cuda import amp
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+
+import test  # import test.py to get mAP after each epoch
+from models.experimental import attempt_load
+from models.yolo import Model
+from utils.autoanchor import check_anchors
+from utils.datasets import create_dataloader
+from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
+    fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
+    check_requirements, print_mutation, set_logging, one_cycle, colorstr
+from utils.google_utils import attempt_download
+from utils.loss import ComputeLoss
+from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
+from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first
+
+from sparseml.pytorch.optim import ScheduledModifierManager, ScheduledOptimizer
+from sparseml.pytorch.utils import ModuleExporter, PythonLogger, TensorBoardLogger
+from sparsezoo import Zoo
+
+logger = logging.getLogger(__name__)
+
+
+def train(hyp, opt, device, tb_writer=None, wandb=None):
+    logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    save_dir, epochs, batch_size, total_batch_size, weights, rank = \
+        Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
+
+    # Directories
+    wdir = save_dir / 'weights'
+    wdir.mkdir(parents=True, exist_ok=True)  # make dir
+    last = wdir / 'last.pt'
+    best = wdir / 'best.pt'
+    results_file = save_dir / 'results.txt'
+
+    # Save run settings
+    with open(save_dir / 'hyp.yaml', 'w') as f:
+        yaml.dump(hyp, f, sort_keys=False)
+    with open(save_dir / 'opt.yaml', 'w') as f:
+        yaml.dump(vars(opt), f, sort_keys=False)
+
+    # Configure
+    plots = not opt.evolve  # create plots
+    cuda = device.type != 'cpu'
+    init_seeds(2 + rank)
+    with open(opt.data) as f:
+        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
+    with torch_distributed_zero_first(rank):
+        check_dataset(data_dict)  # check
+    train_path = data_dict['train']
+    test_path = data_dict['val']
+    nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
+    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
+    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check
+
+    # Model
+    pretrained = weights.endswith('.pt')
+    if pretrained:
+        with torch_distributed_zero_first(rank):
+            attempt_download(weights)  # download if not found locally
+        ckpt = torch.load(weights, map_location=device)  # load checkpoint
+        if hyp.get('anchors'):
+            ckpt['model'].yaml['anchors'] = round(hyp['anchors'])  # force autoanchor
+        model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device)  # create
+        exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else []  # exclude keys
+        state_dict = ckpt['model'].float().state_dict()  # to FP32
+        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
+        model.load_state_dict(state_dict, strict=False)  # load
+        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights))  # report
+    else:
+        model = Model(opt.cfg, ch=3, nc=nc).to(device)  # create
+
+    # Freeze
+    freeze = []  # parameter names to freeze (full or partial)
+    for k, v in model.named_parameters():
+        v.requires_grad = True  # train all layers
+        if any(x in k for x in freeze):
+            print('freezing %s' % k)
+            v.requires_grad = False
+
+    # Optimizer
+    nbs = 64  # nominal batch size
+    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
+    hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay
+    logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")
+
+    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
+    for k, v in model.named_modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
+            pg2.append(v.bias)  # biases
+        if isinstance(v, nn.BatchNorm2d):
+            pg0.append(v.weight)  # no decay
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
+            pg1.append(v.weight)  # apply decay
+
+    if opt.adam:
+        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+    else:
+        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
+
+    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
+    optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
+    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
+    del pg0, pg1, pg2
+
+    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
+    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
+    if opt.linear_lr:
+        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+    else:
+        lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
+    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
+    # plot_lr_scheduler(optimizer, scheduler, epochs)
+
+    # Logging
+    if rank in [-1, 0] and wandb and wandb.run is None:
+        opt.hyp = hyp  # add hyperparameters
+        wandb_run = wandb.init(config=opt, resume="allow",
+                               project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
+                               name=save_dir.stem,
+                               id=ckpt.get('wandb_id') if 'ckpt' in locals() else None)
+    loggers = {'wandb': wandb}  # loggers dict
+
+    # Resume
+    start_epoch, best_fitness = 0, 0.0
+    if pretrained:
+        # Optimizer
+        if ckpt['optimizer'] is not None:
+            optimizer.load_state_dict(ckpt['optimizer'])
+            best_fitness = ckpt['best_fitness']
+
+        # Results
+        if ckpt.get('training_results') is not None:
+            with open(results_file, 'w') as file:
+                file.write(ckpt['training_results'])  # write results.txt
+
+        # Epochs
+        start_epoch = ckpt['epoch'] + 1
+        if opt.resume:
+            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
+        if epochs < start_epoch:
+            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
+                        (weights, ckpt['epoch'], epochs))
+            epochs += ckpt['epoch']  # finetune additional epochs
+
+        del ckpt, state_dict
+
+    # Image sizes
+    gs = int(model.stride.max())  # grid size (max stride)
+    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
+    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
+
+    # DP mode
+    if cuda and rank == -1 and torch.cuda.device_count() > 1:
+        model = torch.nn.DataParallel(model)
+
+    # SyncBatchNorm
+    if opt.sync_bn and cuda and rank != -1:
+        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
+        logger.info('Using SyncBatchNorm()')
+
+    # EMA
+    ema = ModelEMA(model) if rank in [-1, 0] else None
+
+    # DDP mode
+    if cuda and rank != -1:
+        model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)
+
+    # Trainloader
+    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
+                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
+                                            world_size=opt.world_size, workers=opt.workers,
+                                            image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '))
+    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
+    nb = len(dataloader)  # number of batches
+    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
+
+    # Process 0
+    if rank in [-1, 0]:
+        ema.updates = start_epoch * nb // accumulate  # set EMA updates
+        testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt,  # testloader
+                                       hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
+                                       world_size=opt.world_size, workers=opt.workers,
+                                       pad=0.5, prefix=colorstr('val: '))[0]
+
+        if not opt.resume:
+            labels = np.concatenate(dataset.labels, 0)
+            c = torch.tensor(labels[:, 0])  # classes
+            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
+            # model._initialize_biases(cf.to(device))
+            if plots:
+                plot_labels(labels, save_dir, loggers)
+                if tb_writer:
+                    tb_writer.add_histogram('classes', c, 0)
+
+            # Anchors
+            if not opt.noautoanchor:
+                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+
+    # Model parameters
+    hyp['box'] *= 3. / nl  # scale to layers
+    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
+    model.nc = nc  # attach number of classes to model
+    model.hyp = hyp  # attach hyperparameters to model
+    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
+    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.names = names
+
+    ####################################################################################
+    # Start SparseML Integration
+    ####################################################################################
+    # determine recipe type to be used if loading from SparseZoo
+    if opt.sparseml_recipe_path.startswith("zoo:"):
+        zoo_recipe_type = "transfer" if opt.sparse_transfer_learn else "original"
+    else:
+        zoo_recipe_type = None
+    manager = ScheduledModifierManager.from_yaml(
+        opt.sparseml_recipe_path, zoo_recipe_type=zoo_recipe_type
+    )
+    optimizer = ScheduledOptimizer(
+        optimizer,
+        model,
+        manager,
+        steps_per_epoch=len(dataloader),
+        loggers=[PythonLogger(), TensorBoardLogger(writer=tb_writer)]
+    )
+    start_epoch = manager.min_epochs or start_epoch  # override min_epochs
+    epochs = manager.max_epochs or epochs  # override num_epochs
+    ####################################################################################
+    # End SparseML Integration
+    ####################################################################################
+
+    # Start training
+    t0 = time.time()
+    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    maps = np.zeros(nc)  # mAP per class
+    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    scheduler.last_epoch = start_epoch - 1  # do not move
+    scaler = amp.GradScaler(enabled=cuda)
+    compute_loss = ComputeLoss(model)  # init loss class
+    logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n'
+                f'Using {dataloader.num_workers} dataloader workers\n'
+                f'Logging results to {save_dir}\n'
+                f'Starting training for {epochs} epochs...')
+    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+        model.train()
+
+        # Update image weights (optional)
+        if opt.image_weights:
+            # Generate indices
+            if rank in [-1, 0]:
+                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
+                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
+                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+            # Broadcast if DDP
+            if rank != -1:
+                indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()
+                dist.broadcast(indices, 0)
+                if rank != 0:
+                    dataset.indices = indices.cpu().numpy()
+
+        # Update mosaic border
+        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
+        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
+
+        mloss = torch.zeros(4, device=device)  # mean losses
+        if rank != -1:
+            dataloader.sampler.set_epoch(epoch)
+        pbar = enumerate(dataloader)
+        logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size'))
+        if rank in [-1, 0]:
+            pbar = tqdm(pbar, total=nb)  # progress bar
+        optimizer.zero_grad()
+        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+            ni = i + nb * epoch  # number integrated batches (since train start)
+            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+
+            # Warmup
+            if ni <= nw:
+                xi = [0, nw]  # x interp
+                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
+                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
+                for j, x in enumerate(optimizer.param_groups):
+                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+                    if 'momentum' in x:
+                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
+
+            # Multi-scale
+            if opt.multi_scale:
+                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sf = sz / max(imgs.shape[2:])  # scale factor
+                if sf != 1:
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
+                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+
+            # Forward
+            with amp.autocast(enabled=cuda):
+                pred = model(imgs)  # forward
+                loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
+                if rank != -1:
+                    loss *= opt.world_size  # gradient averaged between devices in DDP mode
+                if opt.quad:
+                    loss *= 4.
+
+            # Backward
+            scaler.scale(loss).backward()
+
+            # Optimize
+            if ni % accumulate == 0:
+                scaler.step(optimizer)  # optimizer.step
+                scaler.update()
+                optimizer.zero_grad()
+                if ema:
+                    ema.update(model)
+
+            # Print
+            if rank in [-1, 0]:
+                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
+                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
+                s = ('%10s' * 2 + '%10.4g' * 6) % (
+                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
+                pbar.set_description(s)
+
+                # Plot
+                if plots and ni < 3:
+                    f = save_dir / f'train_batch{ni}.jpg'  # filename
+                    Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
+                    # if tb_writer:
+                    #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
+                    #     tb_writer.add_graph(model, imgs)  # add model to tensorboard
+                elif plots and ni == 10 and wandb:
+                    wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')
+                                           if x.exists()]}, commit=False)
+
+            # end batch ------------------------------------------------------------------------------------------------
+        # end epoch ----------------------------------------------------------------------------------------------------
+
+        # Scheduler
+        lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
+        scheduler.step()
+
+        # DDP process 0 or single-GPU
+        if rank in [-1, 0]:
+            # mAP
+            if ema:
+                ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
+            final_epoch = epoch + 1 == epochs
+            if not opt.notest or final_epoch:  # Calculate mAP
+                results, maps, times = test.test(opt.data,
+                                                 batch_size=batch_size * 2,
+                                                 imgsz=imgsz_test,
+                                                 model=ema.ema,
+                                                 single_cls=opt.single_cls,
+                                                 dataloader=testloader,
+                                                 save_dir=save_dir,
+                                                 verbose=nc < 50 and final_epoch,
+                                                 plots=plots and final_epoch,
+                                                 log_imgs=opt.log_imgs if wandb else 0,
+                                                 compute_loss=compute_loss)
+
+            # Write
+            with open(results_file, 'a') as f:
+                f.write(s + '%10.4g' * 7 % results + '\n')  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+            if len(opt.name) and opt.bucket:
+                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
+
+            # Log
+            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
+                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
+                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
+                    'x/lr0', 'x/lr1', 'x/lr2']  # params
+            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
+                if tb_writer:
+                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard
+                if wandb:
+                    wandb.log({tag: x}, step=epoch, commit=tag == tags[-1])  # W&B
+
+            # Update best mAP
+            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            if fi > best_fitness:
+                best_fitness = fi
+
+            # Save model
+            save = (not opt.nosave) or (final_epoch and not opt.evolve)
+            if save:
+                with open(results_file, 'r') as f:  # create checkpoint
+                    ckpt = {'epoch': epoch,
+                            'best_fitness': best_fitness,
+                            'training_results': f.read(),
+                            'model': ema.ema,
+                            'optimizer': None if final_epoch else optimizer.state_dict(),
+                            'wandb_id': wandb_run.id if wandb else None}
+
+                # Save last, best and delete
+                torch.save(ckpt, last)
+                if best_fitness == fi:
+                    torch.save(ckpt, best)
+                del ckpt
+        # end epoch ----------------------------------------------------------------------------------------------------
+    # end training
+
+    if rank in [-1, 0]:
+        # Strip optimizers
+        final = best if best.exists() else last  # final model
+        for f in [last, best]:
+            if f.exists():
+                strip_optimizer(f)  # strip optimizers
+        if opt.bucket:
+            os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload
+
+        # Plots
+        if plots:
+            plot_results(save_dir=save_dir)  # save as results.png
+            if wandb:
+                files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
+                wandb.log({"Results": [wandb.Image(str(save_dir / f), caption=f) for f in files
+                                       if (save_dir / f).exists()]})
+                if opt.log_artifacts:
+                    wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem)
+
+        # Test best.pt
+        logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
+        if opt.data.endswith('coco.yaml') and nc == 80:  # if COCO
+            for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]):  # speed, mAP tests
+                results, _, _ = test.test(opt.data,
+                                          batch_size=batch_size * 2,
+                                          imgsz=imgsz_test,
+                                          conf_thres=conf,
+                                          iou_thres=iou,
+                                          model=attempt_load(final, device).half(),
+                                          single_cls=opt.single_cls,
+                                          dataloader=testloader,
+                                          save_dir=save_dir,
+                                          save_json=save_json,
+                                          plots=False)
+        #################################################################################
+        # Start SparseML ONNX Export
+        #################################################################################
+            logger.info(
+                f"training complete, exporting ONNX to {save_dir}/model.onnx"
+            )
+            exporter = ModuleExporter(model, save_dir)
+            exporter.export_onnx(torch.randn((1, 3, *imgsz)))
+        #################################################################################
+        # End SparseML ONNX Export
+        #################################################################################
+
+    else:
+        dist.destroy_process_group()
+
+    wandb.run.finish() if wandb and wandb.run else None
+    torch.cuda.empty_cache()
+    return results
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    ####################################################################################
+    # Start SparseML arguments
+    ####################################################################################
+    parser.add_argument(
+        "--sparseml-recipe-path",
+        required=True,
+        type=str,
+        help="path to a SparseML recipe file or a SparseZoo model stub for a recipe to load. "
+             "SparseZoo stubs should be preceded by 'zoo:'. i.e. '/path/to/local/recipe.yaml', "
+             "'zoo:zoo/model/stub'"
+    )
+    parser.add_argument(
+        "--sparse-transfer-learn",
+        action="store_true",
+        help="Enable sparse transfer learning modifiers to enforce the sparsity "
+             "if the recipe comes from a local file, modifiers will be added to the manager "
+             "to hold already sparse layers at the same sparsity level. If the recipe comes "
+             "from SparseZoo, the 'transfer' recipe for the model will be loaded instead",
+    )
+    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
+    parser.add_argument(
+        "--weights",
+        type=str,
+        default="yolov5s.pt",
+        help="initial weights path. can be a local file path, can pass in 'zoo' if "
+        "using a SparseZoo recipe to load that recipes base weights, or pass in a "
+        "SparseZoo model stub, prefixed with 'zoo:' to load weights directly from "
+        "SparseZoo",
+    )
+    ####################################################################################
+    # End SparseML arguments
+    ####################################################################################
+    parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
+    parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
+    parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
+    parser.add_argument('--epochs', type=int, default=300)
+    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
+    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
+    parser.add_argument('--rect', action='store_true', help='rectangular training')
+    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
+    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
+    parser.add_argument('--notest', action='store_true', help='only test final epoch')
+    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
+    parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
+    parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
+    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
+    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
+    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
+    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
+    parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100')
+    parser.add_argument('--log-artifacts', action='store_true', help='log artifacts, i.e. final trained model')
+    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
+    parser.add_argument('--project', default='runs/train', help='save to project/name')
+    parser.add_argument('--name', default='exp', help='save to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--quad', action='store_true', help='quad dataloader')
+    parser.add_argument('--linear-lr', action='store_true', help='linear LR')
+    opt = parser.parse_args()
+
+    # Set DDP variables
+    opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
+    opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
+    set_logging(opt.global_rank)
+    if opt.global_rank in [-1, 0]:
+        check_git_status()
+        check_requirements()
+
+    ####################################################################################
+    # Start - SparseML optional load weights from SparseZoo
+    ####################################################################################
+    if opt.weights == "zoo":
+        # Load checkpoint from base weights associated with given SparseZoo recipe
+        if opt.sparseml_recipe_path.startswith("zoo:"):
+            recipe_type = "transfer" if opt.sparse_transfer_learn else "original"
+            opt.weights = Zoo.download_recipe_base_framework_files(
+                opt.sparseml_recipe_path,
+                recipe_type=recipe_type,
+                extensions=[".pt", ".pth"]
+            )[0]
+        else:
+            raise ValueError(
+                "Attempting to load weights from SparseZoo recipe, but not given a "
+                "SparseZoo recipe stub.  When --weights is set to 'zoo'. "
+                "sparseml-recipe-path must start with 'zoo:' and be a SparseZoo model "
+                f"stub. sparseml-recipe-path was set to {args.sparseml_recipe_path}"
+            )
+    elif opt.weights.startswith("zoo:"):
+        # Load weights from a SparseZoo model stub
+        zoo_model = Zoo.load_model_from_stub(opt.weights)
+        args.initial_checkpoint = zoo_model.download_framework_files(
+            extensions=[".pt", ".pth"]
+        )
+    ####################################################################################
+    # End - SparseML optional load weights from SparseZoo
+    ####################################################################################
+
+    # Resume
+    if opt.resume:  # resume an interrupted run
+        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
+        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
+        apriori = opt.global_rank, opt.local_rank
+        with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
+            opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader))  # replace
+        opt.cfg, opt.weights, opt.resume, opt.batch_size, opt.global_rank, opt.local_rank = '', ckpt, True, opt.total_batch_size, *apriori  # reinstate
+        logger.info('Resuming training from %s' % ckpt)
+    else:
+        # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
+        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)  # check files
+        assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
+        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
+        opt.name = 'evolve' if opt.evolve else opt.name
+        opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve)  # increment run
+
+    # DDP mode
+    opt.total_batch_size = opt.batch_size
+    device = select_device(opt.device, batch_size=opt.batch_size)
+    if opt.local_rank != -1:
+        assert torch.cuda.device_count() > opt.local_rank
+        torch.cuda.set_device(opt.local_rank)
+        device = torch.device('cuda', opt.local_rank)
+        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend
+        assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
+        opt.batch_size = opt.total_batch_size // opt.world_size
+
+    # Hyperparameters
+    with open(opt.hyp) as f:
+        hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps
+
+    # Train
+    logger.info(opt)
+    try:
+        import wandb
+    except ImportError:
+        wandb = None
+        prefix = colorstr('wandb: ')
+        logger.info(f"{prefix}Install Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)")
+    if not opt.evolve:
+        tb_writer = None  # init loggers
+        if opt.global_rank in [-1, 0]:
+            logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/')
+            tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
+        train(hyp, opt, device, tb_writer, wandb)
+
+    # Evolve hyperparameters (optional)
+    else:
+        # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
+        meta = {'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
+                'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
+                'momentum': (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
+                'weight_decay': (1, 0.0, 0.001),  # optimizer weight decay
+                'warmup_epochs': (1, 0.0, 5.0),  # warmup epochs (fractions ok)
+                'warmup_momentum': (1, 0.0, 0.95),  # warmup initial momentum
+                'warmup_bias_lr': (1, 0.0, 0.2),  # warmup initial bias lr
+                'box': (1, 0.02, 0.2),  # box loss gain
+                'cls': (1, 0.2, 4.0),  # cls loss gain
+                'cls_pw': (1, 0.5, 2.0),  # cls BCELoss positive_weight
+                'obj': (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
+                'obj_pw': (1, 0.5, 2.0),  # obj BCELoss positive_weight
+                'iou_t': (0, 0.1, 0.7),  # IoU training threshold
+                'anchor_t': (1, 2.0, 8.0),  # anchor-multiple threshold
+                'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
+                'fl_gamma': (0, 0.0, 2.0),  # focal loss gamma (efficientDet default gamma=1.5)
+                'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
+                'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
+                'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
+                'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
+                'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
+                'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
+                'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
+                'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
+                'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
+                'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
+                'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
+                'mixup': (1, 0.0, 1.0)}  # image mixup (probability)
+
+        assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
+        opt.notest, opt.nosave = True, True  # only test/save final epoch
+        # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
+        yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml'  # save best result here
+        if opt.bucket:
+            os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
+
+        for _ in range(300):  # generations to evolve
+            if Path('evolve.txt').exists():  # if evolve.txt exists: select best hyps and mutate
+                # Select parent(s)
+                parent = 'single'  # parent selection method: 'single' or 'weighted'
+                x = np.loadtxt('evolve.txt', ndmin=2)
+                n = min(5, len(x))  # number of previous results to consider
+                x = x[np.argsort(-fitness(x))][:n]  # top n mutations
+                w = fitness(x) - fitness(x).min()  # weights
+                if parent == 'single' or len(x) == 1:
+                    # x = x[random.randint(0, n - 1)]  # random selection
+                    x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
+                elif parent == 'weighted':
+                    x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination
+
+                # Mutate
+                mp, s = 0.8, 0.2  # mutation probability, sigma
+                npr = np.random
+                npr.seed(int(time.time()))
+                g = np.array([x[0] for x in meta.values()])  # gains 0-1
+                ng = len(meta)
+                v = np.ones(ng)
+                while all(v == 1):  # mutate until a change occurs (prevent duplicates)
+                    v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
+                for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
+                    hyp[k] = float(x[i + 7] * v[i])  # mutate
+
+            # Constrain to limits
+            for k, v in meta.items():
+                hyp[k] = max(hyp[k], v[1])  # lower limit
+                hyp[k] = min(hyp[k], v[2])  # upper limit
+                hyp[k] = round(hyp[k], 5)  # significant digits
+
+            # Train mutation
+            results = train(hyp.copy(), opt, device, wandb=wandb)
+
+            # Write mutation results
+            print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
+
+        # Plot results
+        plot_evolution(yaml_file)
+        print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
+              f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')

From da8821bf567e03aad05de810727ec03be5814e0c Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Mon, 22 Feb 2021 22:41:11 -0500
Subject: [PATCH 2/6] addressing review comments

---
 .../ultralytics}/README.md                    | 19 +++----
 .../ultralytics}/main.py                      | 52 ++++++++-----------
 2 files changed, 29 insertions(+), 42 deletions(-)
 rename {examples/ultralytics-sparseml => integrations/ultralytics}/README.md (80%)
 rename {examples/ultralytics-sparseml => integrations/ultralytics}/main.py (95%)

diff --git a/examples/ultralytics-sparseml/README.md b/integrations/ultralytics/README.md
similarity index 80%
rename from examples/ultralytics-sparseml/README.md
rename to integrations/ultralytics/README.md
index 67693654fec..a100056147b 100644
--- a/examples/ultralytics-sparseml/README.md
+++ b/integrations/ultralytics/README.md
@@ -17,7 +17,7 @@ limitations under the License.
 # SparseML-ultralytics/yolov5 integration
 This directory provides a SparseML integrated training script for the popular
 [ultralytics/yolov5](https://github.com/ultralytics/yolov5)
-repository also known as [timm](https://pypi.org/project/timm/).
+repository.
 
 Using this integration, you will be able to apply SparseML optimizations
 to the powerful training flows provided in the yolov5 repository.
@@ -38,7 +38,7 @@ git clone https://github.com/ultralytics/yolov5.git
 git clone https://github.com/neuralmagic/sparseml.git
 
 # copy script
-cp sparseml/examples/ultralytics-sparseml/main.py yolov5
+cp sparseml/integrations/ultralytics/main.py yolov5
 cd yolov5
 
 # install dependencies
@@ -48,20 +48,13 @@ pip install sparseml
 
 
 ## Script
-`examples/timm-sparseml/main.py` modifies
+`integrations/ultralytics/main.py` modifies
 [`train.py`](https://github.com/ultralytics/yolov5/blob/master/train.py)
-from yolov5 to include a `sparseml-recipe-path` argument
+from yolov5 to include a `sparseml-recipe` argument
 to run SparseML optimizations with.  This can be a file path to a local
 SparseML recipe or a SparseZoo model stub prefixed by `zoo:` such as
 `zoo:cv/detection/yolo_v3-spp/pytorch/ultralytics/coco/pruned-aggressive`.
 
-Additionally, for sparse transfer learning, the flag `--sparse-transfer-learn`
-was added.  Running the script with this flag will add modifiers to the given
-recipe that will keep the base sparsity constant during training, allowing
-the model to learn the new dataset while keeping the same optimized structure.
-If a SparseZoo recipe path is provided with sparse transfer learning enabled,
-then the the model's specific "transfer" recipe will be loaded instead.
-
 To load the base weights for a SparseZoo recipe as the initial checkpoint, set
 `--initial-checkpoint` to `zoo`.  To use the weights of a SparseZoo model as the
 initial checkpoint, pass that model's SparseZoo stub prefixed by `zoo:` to the
@@ -72,7 +65,7 @@ follow the normal yolov5 training flow with the given SparseML optimizations ena
 
 Some considerations:
 
-* `--sparseml-recipe-path` is a required parameter
+* `--sparseml-recipe` is a required parameter
 * `--epochs` will now be overridden by the epochs set in the SparseML recipe
 * if using learning rate schedulers both with the yolov5 script and your recipe, they
 may conflict with each other causing unintended side effects, choose
@@ -95,6 +88,6 @@ Call the script from the `yolov5` directory, passing in the same arguments as
 `train.py`, with the additional SparseML argument(s) included.
 ```bash
 python main.py \
-  --sparseml-recipe-path /PATH/TO/RECIPE/recipe.yaml \
+  --sparseml-recipe /PATH/TO/RECIPE/recipe.yaml \
   <regular yolov5/train.py paramters>
 ```  
diff --git a/examples/ultralytics-sparseml/main.py b/integrations/ultralytics/main.py
similarity index 95%
rename from examples/ultralytics-sparseml/main.py
rename to integrations/ultralytics/main.py
index 50cb3e7ca59..31edbd0d2e1 100644
--- a/examples/ultralytics-sparseml/main.py
+++ b/integrations/ultralytics/main.py
@@ -7,11 +7,13 @@
 Integration between https://github.com/ultralytics/yolov5 and SparseML
 
 This script is adapted from https://github.com/ultralytics/yolov5/blob/master/train.py
-to apply a SparseML recipe from the required `--sparseml-recipe-path` argument.
+to apply a SparseML recipe from the required `--sparseml-recipe` argument.
 Integration lines are preceded by comment blocks.  Run with `--help` for help printout,
 more information can be found in the readme file.
 
-Latest yolov5 commit this script is based on: c9bda11
+Latest yolov5 commit this script is based on:
+https://github.com/ultralytics/yolov5/tree/c9bda112aebaa0be846864f9d224191d0e19d419
+commit hash: c9bda11
 """
 import argparse
 import logging
@@ -244,14 +246,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
     ####################################################################################
     # Start SparseML Integration
     ####################################################################################
-    # determine recipe type to be used if loading from SparseZoo
-    if opt.sparseml_recipe_path.startswith("zoo:"):
-        zoo_recipe_type = "transfer" if opt.sparse_transfer_learn else "original"
-    else:
-        zoo_recipe_type = None
-    manager = ScheduledModifierManager.from_yaml(
-        opt.sparseml_recipe_path, zoo_recipe_type=zoo_recipe_type
-    )
+    manager = ScheduledModifierManager.from_yaml(opt.sparseml_recipe)
     optimizer = ScheduledOptimizer(
         optimizer,
         model,
@@ -259,8 +254,15 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
         steps_per_epoch=len(dataloader),
         loggers=[PythonLogger(), TensorBoardLogger(writer=tb_writer)]
     )
-    start_epoch = manager.min_epochs or start_epoch  # override min_epochs
-    epochs = manager.max_epochs or epochs  # override num_epochs
+    # override lr scheduler if recipe makes any LR updates
+    if any("LearningRate" in str(modifier) for modifier in manager.modifiers):
+        logger.info("Disabling yolo LR scheduler, managing LR using SparseML recipe")
+        scheduler = None
+    if manager.max_epochs:
+        epochs = manager.max_epochs or epochs  # override num_epochs
+        logger.info(
+            f"overriding number of epochs from SparseML manager to {manager.max_epochs}"
+        )
     ####################################################################################
     # End SparseML Integration
     ####################################################################################
@@ -271,7 +273,8 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
     # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
     maps = np.zeros(nc)  # mAP per class
     results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
-    scheduler.last_epoch = start_epoch - 1  # do not move
+    if scheduler:
+        scheduler.last_epoch = start_epoch - 1  # do not move
     scaler = amp.GradScaler(enabled=cuda)
     compute_loss = ComputeLoss(model)  # init loss class
     logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n'
@@ -374,7 +377,8 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
 
         # Scheduler
         lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
-        scheduler.step()
+        if scheduler:
+            scheduler.step()
 
         # DDP process 0 or single-GPU
         if rank in [-1, 0]:
@@ -496,21 +500,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
     # Start SparseML arguments
     ####################################################################################
     parser.add_argument(
-        "--sparseml-recipe-path",
+        "--sparseml-recipe",
         required=True,
         type=str,
         help="path to a SparseML recipe file or a SparseZoo model stub for a recipe to load. "
              "SparseZoo stubs should be preceded by 'zoo:'. i.e. '/path/to/local/recipe.yaml', "
              "'zoo:zoo/model/stub'"
     )
-    parser.add_argument(
-        "--sparse-transfer-learn",
-        action="store_true",
-        help="Enable sparse transfer learning modifiers to enforce the sparsity "
-             "if the recipe comes from a local file, modifiers will be added to the manager "
-             "to hold already sparse layers at the same sparsity level. If the recipe comes "
-             "from SparseZoo, the 'transfer' recipe for the model will be loaded instead",
-    )
     parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
     parser.add_argument(
         "--weights",
@@ -568,19 +564,17 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
     ####################################################################################
     if opt.weights == "zoo":
         # Load checkpoint from base weights associated with given SparseZoo recipe
-        if opt.sparseml_recipe_path.startswith("zoo:"):
-            recipe_type = "transfer" if opt.sparse_transfer_learn else "original"
+        if opt.sparseml_recipe.startswith("zoo:"):
             opt.weights = Zoo.download_recipe_base_framework_files(
-                opt.sparseml_recipe_path,
-                recipe_type=recipe_type,
+                opt.sparseml_recipe,
                 extensions=[".pt", ".pth"]
             )[0]
         else:
             raise ValueError(
                 "Attempting to load weights from SparseZoo recipe, but not given a "
                 "SparseZoo recipe stub.  When --weights is set to 'zoo'. "
-                "sparseml-recipe-path must start with 'zoo:' and be a SparseZoo model "
-                f"stub. sparseml-recipe-path was set to {args.sparseml_recipe_path}"
+                "sparseml-recipe must start with 'zoo:' and be a SparseZoo model "
+                f"stub. sparseml-recipe was set to {args.sparseml_recipe}"
             )
     elif opt.weights.startswith("zoo:"):
         # Load weights from a SparseZoo model stub

From ae7e602088c7ede981d66ce89fd8ca9e86f6598e Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Tue, 23 Feb 2021 09:34:30 -0500
Subject: [PATCH 3/6] renaming script to train.py

---
 integrations/ultralytics/README.md             | 8 ++++----
 integrations/ultralytics/{main.py => train.py} | 0
 2 files changed, 4 insertions(+), 4 deletions(-)
 rename integrations/ultralytics/{main.py => train.py} (100%)

diff --git a/integrations/ultralytics/README.md b/integrations/ultralytics/README.md
index a100056147b..26ba6aebe1c 100644
--- a/integrations/ultralytics/README.md
+++ b/integrations/ultralytics/README.md
@@ -38,7 +38,7 @@ git clone https://github.com/ultralytics/yolov5.git
 git clone https://github.com/neuralmagic/sparseml.git
 
 # copy script
-cp sparseml/integrations/ultralytics/main.py yolov5
+cp sparseml/integrations/ultralytics/train.py yolov5
 cd yolov5
 
 # install dependencies
@@ -48,7 +48,7 @@ pip install sparseml
 
 
 ## Script
-`integrations/ultralytics/main.py` modifies
+`integrations/ultralytics/train.py` modifies
 [`train.py`](https://github.com/ultralytics/yolov5/blob/master/train.py)
 from yolov5 to include a `sparseml-recipe` argument
 to run SparseML optimizations with.  This can be a file path to a local
@@ -80,14 +80,14 @@ documentation, or export one with [Sparsify](https://github.com/neuralmagic/spar
 
 Documentation on the original script can be found
 [here](https://github.com/ultralytics/yolov5).
-The latest commit hash that `main.py` is based on is included in the docstring.
+The latest commit hash that `train.py` is based on is included in the docstring.
 
 
 #### Example Command
 Call the script from the `yolov5` directory, passing in the same arguments as
 `train.py`, with the additional SparseML argument(s) included.
 ```bash
-python main.py \
+python train.py \
   --sparseml-recipe /PATH/TO/RECIPE/recipe.yaml \
   <regular yolov5/train.py paramters>
 ```  
diff --git a/integrations/ultralytics/main.py b/integrations/ultralytics/train.py
similarity index 100%
rename from integrations/ultralytics/main.py
rename to integrations/ultralytics/train.py

From 8ef1c013ef41129f5a1be2fae502e94dafa6daf6 Mon Sep 17 00:00:00 2001
From: Benjamin Fineran <bfineran@users.noreply.github.com>
Date: Tue, 23 Feb 2021 15:44:33 -0500
Subject: [PATCH 4/6] delete repeated arg

---
 integrations/ultralytics/train.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/integrations/ultralytics/train.py b/integrations/ultralytics/train.py
index 31edbd0d2e1..da62a23482e 100644
--- a/integrations/ultralytics/train.py
+++ b/integrations/ultralytics/train.py
@@ -507,7 +507,6 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
              "SparseZoo stubs should be preceded by 'zoo:'. i.e. '/path/to/local/recipe.yaml', "
              "'zoo:zoo/model/stub'"
     )
-    parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
     parser.add_argument(
         "--weights",
         type=str,

From 1f5b42701df516168a2562e6d62805411d6c921f Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Tue, 23 Feb 2021 17:41:51 -0500
Subject: [PATCH 5/6] disabling EMA by default, responding to review

---
 integrations/ultralytics/README.md | 16 +++++++++-------
 integrations/ultralytics/train.py  | 21 +++++++++++++++++----
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/integrations/ultralytics/README.md b/integrations/ultralytics/README.md
index 26ba6aebe1c..11b420d2e4a 100644
--- a/integrations/ultralytics/README.md
+++ b/integrations/ultralytics/README.md
@@ -24,12 +24,12 @@ to the powerful training flows provided in the yolov5 repository.
 
 Some of the tasks you can perform using this integration include, but are not limited to:
 * model pruning
-* quantization-aware-training
-* sparse quantization-aware-training
+* quantization-aware training
+* sparse quantization-aware training
 * sparse transfer learning
 
 ## Installation
-To use both the script, clone both repositories, install their dependencies,
+To use the script, clone both repositories, install their dependencies,
 and copy the integrated training script into the yolov5 directory to run from.
 
 ```bash
@@ -68,10 +68,12 @@ Some considerations:
 * `--sparseml-recipe` is a required parameter
 * `--epochs` will now be overridden by the epochs set in the SparseML recipe
 * if using learning rate schedulers both with the yolov5 script and your recipe, they
-may conflict with each other causing unintended side effects, choose
-hyperparameters accordingly.
-* Modifiers will log their outputs to the console as well as to the tensorboard file
+may conflict with each other causing unintended side effects, so choose
+hyperparameters accordingly
+* Modifiers will log their outputs to the console as well as to the TensorBoard file
 * After training is complete, the final model will be exported to ONNX using SparseML
+* By default, EMA is disabled when using `train.py`. This is to allow for best compatibility
+with pruning and quantization.  To enable, set the `--use-ema` flag
 
 You can learn how to build or download a recipe using the
 [SparseML](https://github.com/neuralmagic/sparseml)
@@ -89,5 +91,5 @@ Call the script from the `yolov5` directory, passing in the same arguments as
 ```bash
 python train.py \
   --sparseml-recipe /PATH/TO/RECIPE/recipe.yaml \
-  <regular yolov5/train.py paramters>
+  <regular yolov5/train.py parameters>
 ```  
diff --git a/integrations/ultralytics/train.py b/integrations/ultralytics/train.py
index da62a23482e..ae282a0c0d0 100644
--- a/integrations/ultralytics/train.py
+++ b/integrations/ultralytics/train.py
@@ -196,7 +196,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
         logger.info('Using SyncBatchNorm()')
 
     # EMA
-    ema = ModelEMA(model) if rank in [-1, 0] else None
+    ####################################################################################
+    # Start SparseML Integration - optional EMA
+    ####################################################################################
+    ema = ModelEMA(model) if rank in [-1, 0] and opt.use_ema else None
+    ####################################################################################
+    # End SparseML Integration - optional EMA
+    ####################################################################################
 
     # DDP mode
     if cuda and rank != -1:
@@ -213,7 +219,8 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
 
     # Process 0
     if rank in [-1, 0]:
-        ema.updates = start_epoch * nb // accumulate  # set EMA updates
+        if ema:
+            ema.updates = start_epoch * nb // accumulate  # set EMA updates
         testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt,  # testloader
                                        hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
                                        world_size=opt.world_size, workers=opt.workers,
@@ -385,12 +392,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
             # mAP
             if ema:
                 ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
+
             final_epoch = epoch + 1 == epochs
             if not opt.notest or final_epoch:  # Calculate mAP
                 results, maps, times = test.test(opt.data,
                                                  batch_size=batch_size * 2,
                                                  imgsz=imgsz_test,
-                                                 model=ema.ema,
+                                                 model=ema.ema if ema else model,
                                                  single_cls=opt.single_cls,
                                                  dataloader=testloader,
                                                  save_dir=save_dir,
@@ -428,7 +436,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
                     ckpt = {'epoch': epoch,
                             'best_fitness': best_fitness,
                             'training_results': f.read(),
-                            'model': ema.ema,
+                            'model': ema.ema if ema else model,
                             'optimizer': None if final_epoch else optimizer.state_dict(),
                             'wandb_id': wandb_run.id if wandb else None}
 
@@ -516,6 +524,11 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
         "SparseZoo model stub, prefixed with 'zoo:' to load weights directly from "
         "SparseZoo",
     )
+    parser.add_argument(
+        "--use-ema",
+        action="store_true",
+        help="set flag to enable EMA updates. disabled by default in SparseML integration"
+    )
     ####################################################################################
     # End SparseML arguments
     ####################################################################################

From d8fd6d1efcc4348bc2c1877c1a0f29c72a34f440 Mon Sep 17 00:00:00 2001
From: Benjamin <ben@neuralmagic.com>
Date: Wed, 24 Feb 2021 12:34:59 -0500
Subject: [PATCH 6/6] disabling AMP by default for QAT compatibility

---
 integrations/ultralytics/README.md |  8 ++++++--
 integrations/ultralytics/train.py  | 10 ++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/integrations/ultralytics/README.md b/integrations/ultralytics/README.md
index 11b420d2e4a..13bcddc9cc0 100644
--- a/integrations/ultralytics/README.md
+++ b/integrations/ultralytics/README.md
@@ -72,8 +72,12 @@ may conflict with each other causing unintended side effects, so choose
 hyperparameters accordingly
 * Modifiers will log their outputs to the console as well as to the TensorBoard file
 * After training is complete, the final model will be exported to ONNX using SparseML
-* By default, EMA is disabled when using `train.py`. This is to allow for best compatibility
-with pruning and quantization.  To enable, set the `--use-ema` flag
+* By default, EMA is disabled when using the integrated `train.py`. This is to allow
+for best compatibility with pruning and quantization.  To enable, set the `--use-ema`
+flag
+* By default, Automatic Mixed Precision (AMP) is disabled when using the integrated
+`train.py`. This is because mixed precision is not supported for PyTorch
+quantization-aware training.  To enable, set the `--use-amp` flag
 
 You can learn how to build or download a recipe using the
 [SparseML](https://github.com/neuralmagic/sparseml)
diff --git a/integrations/ultralytics/train.py b/integrations/ultralytics/train.py
index ae282a0c0d0..e7ddbfdf51a 100644
--- a/integrations/ultralytics/train.py
+++ b/integrations/ultralytics/train.py
@@ -282,7 +282,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
     results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
     if scheduler:
         scheduler.last_epoch = start_epoch - 1  # do not move
-    scaler = amp.GradScaler(enabled=cuda)
+    scaler = amp.GradScaler(enabled=(cuda and opt.use_amp))
     compute_loss = ComputeLoss(model)  # init loss class
     logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n'
                 f'Using {dataloader.num_workers} dataloader workers\n'
@@ -341,7 +341,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
                     imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
 
             # Forward
-            with amp.autocast(enabled=cuda):
+            with amp.autocast(enabled=(cuda and opt.use_amp)):
                 pred = model(imgs)  # forward
                 loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
                 if rank != -1:
@@ -529,6 +529,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
         action="store_true",
         help="set flag to enable EMA updates. disabled by default in SparseML integration"
     )
+    parser.add_argument(
+        "--use-amp",
+        action="store_true",
+        help="set flag to enable Automatic Mixed Precision (AMP). disabled by default "
+        "in SparseML integration"
+    )
     ####################################################################################
     # End SparseML arguments
     ####################################################################################