In [1]:
!pip install ultralytics



# Before pruning and quantization

In [2]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolov8m.yaml")  # build a new model from scratch
model = YOLO("yolov8m.pt")  # load a pretrained model (recommended for training)


                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytics.nn.modules.conv.Conv             [384

In [3]:
metrics = model.val(data='coco.yaml')  # evaluate model performance on the validation set

Ultralytics YOLOv8.0.181 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8m summary (fused): 218 layers, 25886080 parameters, 0 gradients

Dataset 'coco.yaml' images not found ⚠️, missing path '/content/datasets/coco/val2017.txt'
Downloading https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip to '/content/datasets/coco2017labels-segments.zip'...
100%|██████████| 169M/169M [00:02<00:00, 70.1MB/s]
Unzipping /content/datasets/coco2017labels-segments.zip to /content/datasets/coco...: 100%|██████████| 122232/122232 [00:18<00:00, 6458.66file/s]
Downloading http://images.cocodataset.org/zips/val2017.zip to '/content/datasets/coco/images/val2017.zip'...
Dataset download success ✅ (46.9s), saved to [1m/content/datasets[0m

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...
100%|██████████| 755k/755k [00:00<00:00, 22.8MB/s]
[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017... 4

loading annotations into memory...
Done (t=0.77s)
creating index...
index created!
Loading and preparing results...
DONE (t=2.66s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=53.18s).
Accumulating evaluation results...
DONE (t=12.24s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.502
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.672
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.546
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.320
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.558
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.664
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.383
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.635
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe

Results saved to [1mruns/detect/val[0m


In [4]:
print(f'box map50-95 {metrics.box.map:.3}')
print(f'box map50 {metrics.box.map50:.3}')
print(f'box map75 {metrics.box.map75:.3}')

box map50-95 0.501
box map50 0.667
box map75 0.546


In [5]:
def check_model_size(model):
    buffer_size, param_size = 0, 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2

    return size_all_mb

In [6]:
check_model_size(model.model) # size in mb

98.74755859375

# Model quantization
links:
- https://pytorch.org/tutorials/recipes/quantization.html

In [7]:
# Load a model
model = YOLO("yolov8m.yaml")  # build a new model from scratch
model = YOLO("yolov8m.pt")  # load a pretrained model (recommended for training)


                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytics.nn.modules.conv.Conv             [384

## Динамическая квантизация

In [17]:
import torch
# динамичесая квантизация - поддерживает только линейные слои
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)

In [18]:
ckpt = {

            'model': model_int8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_int8)}')

torch.save(ckpt, './model_quant.pt')

quant_model = YOLO("./model_quant.pt")

Quant model size: 98.93766021728516


In [19]:
results = quant_model.val(data='coco.yaml')

Ultralytics YOLOv8.0.181 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8m summary (fused): 218 layers, 25886080 parameters, 0 gradients
[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [01:56<00:00,  2.69it/s]
                   all       5000      36335      0.716       0.61      0.667      0.501
                person       5000      10777      0.821      0.745      0.829      0.617
               bicycle       5000        314      0.742      0.525      0.626      0.402
                   car       5000       1918      0.765      0.637      0.713      0.497
            motorcycle       5000        367      0.811       0.68      0.793      0.547
              airplane       5000        143       0.84      0.884      0.925      0.776
          

loading annotations into memory...
Done (t=0.86s)
creating index...
index created!
Loading and preparing results...
DONE (t=4.88s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=58.05s).
Accumulating evaluation results...
DONE (t=12.47s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.502
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.672
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.546
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.320
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.558
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.664
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.383
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.635
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe

Results saved to [1mruns/detect/val3[0m


In [20]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

box map50-95 0.501
box map50 0.667
box map75 0.546


## Статическая квантизация

In [61]:
# Load a model
model = YOLO("yolov8m.yaml")  # build a new model from scratch
model = YOLO("yolov8m.pt")  # load a pretrained model (recommended for training)


                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytics.nn.modules.conv.Conv             [384

In [62]:
model.model = model.model.to('cpu')

In [None]:
https://github.com/ultralytics/yolov5/issues/1288

In [63]:
backend = "x86"
model.model.qconfig = torch.quantization.get_default_qconfig(backend)
torch.backends.quantized.engine = backend
model_static_quantized = torch.quantization.prepare(model.model, inplace=False)
model_static_quantized = torch.quantization.convert(model_static_quantized, inplace=False)



In [64]:
print(f'Quant model size: {check_model_size(model_static_quantized)}')

Quant model size: 0.2541542053222656


In [65]:
model.model = model_static_quantized

In [66]:
check_model_size(model.model)

0.2541542053222656

#Pruning

Iterative Pruning
The same parameter in a module can be pruned multiple times, with the effect of the various pruning calls being equal to the combination of the various masks applied in series. The combination of a new mask with the old mask is handled by the PruningContainer’s compute_mask method.

Say, for example, that we now want to further prune module.weight, this time using structured pruning along the 0th axis of the tensor (the 0th axis corresponds to the output channels of the convolutional layer and has dimensionality 6 for conv1), based on the channels’ L2 norm. This can be achieved using the ln_structured function, with n=2 and dim=0

In [None]:
import torch, torch.nn as nn
from torch.nn.utils import prune
from ultralytics import YOLO


model = YOLO("yolov8m.pt")

def sparsity(model):
    # Return global model sparsity
    a, b = 0, 0
    for p in model.parameters():
        a += p.numel()
        b += (p == 0).sum()
    return b / a

for name, m in model.model.named_modules():
    if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
        print(f'Before {m.weight}')
        prune.ln_structured(m, name='weight', amount=0.05, n=2, dim=0)  # prune
        print(f'After {m.weight}')
        prune.remove(m, 'weight')  # make permanent
print(f'Model pruned to {sparsity(model.model):.3g} global sparsity')

ckpt = {

            'model': model.model,
            'train_args': {},  # save as dict
}

print(f'Pruned model size: {check_model_size(model.model)}')

torch.save(ckpt, './model_pruned.pt')

pruned_model = YOLO("./model_pruned.pt")

#results = pruned_model.val()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
          [ 8.5974e-04,  4.2000e-03,  9.0885e-04]],

         [[ 3.6907e-04,  1.4582e-03,  6.1073e-03],
          [ 1.2407e-03,  5.7182e-03, -2.3174e-04],
          [-1.5230e-03, -7.7133e-03, -2.4056e-04]],

         [[-6.5231e-03,  8.1253e-03,  9.2983e-04],
          [-5.6000e-03,  5.0507e-03, -6.9962e-03],
          [ 2.3766e-03, -3.3203e-02, -1.3580e-03]],

         ...,

         [[-9.3317e-04,  1.3247e-03,  6.0368e-04],
          [ 3.6011e-03,  2.4529e-03,  1.2093e-03],
          [ 7.8735e-03,  1.5404e-02,  4.9171e-03]],

         [[ 1.8263e-03,  6.3591e-03,  5.8899e-03],
          [ 7.6294e-04,  5.5313e-03, -1.2245e-03],
          [-4.3640e-03, -5.7030e-04,  1.6460e-03]],

         [[-1.0967e-03, -2.1648e-03, -7.9203e-04],
          [ 1.4267e-03,  1.5345e-03,  1.5211e-03],
          [-6.4659e-04, -1.0996e-03,  1.4944e-03]]],


        [[[ 1.6727e-03, -1.1978e-03, -5.4646e-04],
          [ 3.8414e-03,  4.6692e-03, -1

In [None]:
results = pruned_model.val(data='coco.yaml')

Ultralytics YOLOv8.0.180 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8m summary (fused): 218 layers, 25886080 parameters, 0 gradients
[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [02:10<00:00,  2.40it/s]
                   all       5000      36335      0.543      0.272      0.307      0.217
                person       5000      10777      0.559      0.663      0.655      0.454
               bicycle       5000        314      0.423      0.351      0.336      0.195
                   car       5000       1918      0.636      0.499      0.529      0.359
            motorcycle       5000        367      0.642      0.346      0.421      0.254
              airplane       5000        143      0.774      0.574      0.663      0.487
          

loading annotations into memory...
Done (t=0.54s)
creating index...
index created!
Loading and preparing results...
DONE (t=7.20s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=72.80s).
Accumulating evaluation results...
DONE (t=21.10s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.215
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.305
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.236
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.128
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.271
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.282
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.229
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.387
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDe

Results saved to [1mruns/detect/val2[0m


In [None]:
print(f'box map50-95 {results.box.map:.3}')
print(f'box map50 {results.box.map50:.3}')
print(f'box map75 {results.box.map75:.3}')

box map50-95 0.217
box map50 0.307
box map75 0.238


In [None]:
check_model_size(pruned_model.model)

98.74755859375