# Проверим установку Ultralytics

Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/requirements.txt) and check PyTorch and GPU.

In [1]:
import ultralytics
ultralytics.checks()

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-1.13.1+cu117 CUDA:0 (NVIDIA A10, 24074MiB)
Setup complete ✅ (12 CPUs, 31.1 GB RAM, 232.7/913.8 GB disk)


# Измерим метрики перед квантизацией и прунингом

In [2]:
import os

In [3]:
from ultralytics import YOLO

# Load a model
# load an official model
model = YOLO("yolov8l-seg.yaml")  # build a new model from scratch
model = YOLO('yolov8l-seg.pt')


                   from  n    params  module                                       arguments                     


  0                  -1  1      1856  ultralytics.nn.modules.conv.Conv             [3, 64, 3, 2]                 
  1                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  2                  -1  3    279808  ultralytics.nn.modules.block.C2f             [128, 128, 3, True]           
  3                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  4                  -1  6   2101248  ultralytics.nn.modules.block.C2f             [256, 256, 6, True]           
  5                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256, 512, 3, 2]              
  6                  -1  6   8396800  ultralytics.nn.modules.block.C2f             [512, 512, 6, True]           
  7                  -1  1   2360320  ultralytics.nn.modules.conv.Conv             [512, 512, 3, 2]              
  8                  -1  3   4461568  ultralytics.nn.modules.block.C2f             [512,

In [6]:
# Validate the model
# no arguments needed, dataset and settings remembered
metrics = model.val(task='segment', imgsz=640, data='ms_coco_val_2017.yaml', batch=1, device=0)

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-1.13.1+cu117 CUDA:0 (NVIDIA A10, 24074MiB)
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework1/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 5000/5000 [01:31<00:00, 54.63it/s]
                   all       5000      36335      0.748      0.616      0.688      0.522      0.741      0.599      0.659      0.431
                person       5000      10777      0.832       0.75      0.843      0.636      0.827      0.733      0.822      0.516
               bicycle       5000        314      0.789      0.541      0.662      0.421      0.734      0.478      0.581      0.251
                   car       5000       1918      0.772      0.661      0.734      0.522      0.758      0.6

In [8]:
print(f'mask map50-95 {metrics.seg.map:.3}')
print(f'mask map50 {metrics.seg.map50:.3}')
print(f'mask map75 {metrics.seg.map75:.3}')

mask map50-95 0.431
mask map50 0.659
mask map75 0.465


In [9]:
def check_model_size(model):
    buffer_size, param_size = 0, 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / 1024**2

    return size_all_mb

In [10]:
# измерим вес модели в мб
check_model_size(model.model) # size in mb

175.375244140625

# Квантизация модели

согласно туториалу PyTorch:
- https://pytorch.org/docs/stable/quantization.html

## Динамическая квантизация

In [15]:
from ultralytics import YOLO

# перед экспериментами будем загружать неизменную модель
model = YOLO('yolov8l-seg.pt')

In [16]:
import torch

# динамическая квантизация - поддерживает разные слои.
# однако из тех которые есть в нашей модели это только линейные слои.
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8)

In [17]:
ckpt = {

            'model': model_int8,
            'train_args': {},  # save as dict
}

print(f'Quant model size: {check_model_size(model_int8)}')

torch.save(ckpt, './model_quant.pt')

quant_model = YOLO("./model_quant.pt")

Quant model size: 175.6525421142578


In [18]:
quant_model.export(format='onnx')

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-1.13.1+cu117 CPU (12th Gen Intel Core(TM) i5-12600)
YOLOv8l-seg summary (fused): 295 layers, 45973568 parameters, 0 gradients

[34m[1mPyTorch:[0m starting from 'model_quant.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (176.0 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 16...
[34m[1mONNX:[0m export success ✅ 1.5s, saved as 'model_quant.onnx' (175.6 MB)

Export complete (3.6s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework2/segmentation[0m
Predict:         yolo predict task=detect model=model_quant.onnx imgsz=640  
Validate:        yolo val task=detect model=model_quant.onnx imgsz=640 data=None  
Visualize:       https://netron.app


'model_quant.onnx'

In [20]:
quant_model = YOLO("./model_quant.onnx")



In [21]:
# Validate the model
# no arguments needed, dataset and settings remembered
metrics = quant_model.val(task='segment', imgsz=640, data='ms_coco_val_2017.yaml', batch=1, device=0)

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-1.13.1+cu117 CUDA:0 (NVIDIA A10, 24074MiB)
Loading model_quant.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,640,640) for non-PyTorch models
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework1/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/5000 [00:00<?, ?it/s]


IndexError: index 97 is out of bounds for axis 0 with size 81

In [None]:
print(f'mask map50-95 {metrics.seg.map:.3}')
print(f'mask map50 {metrics.seg.map50:.3}')
print(f'mask map75 {metrics.seg.map75:.3}')

box map50-95 0.501
box map50 0.667
box map75 0.546


## ONNX dynamic quantization


In [13]:
from ultralytics import YOLO

# перед экспериментами будем загружать неизменную модель
model = YOLO("yolov8l-seg.yaml")  # build a new model from scratch
model = YOLO('yolov8l-seg.pt')


                   from  n    params  module                                       arguments                     
  0                  -1  1      1856  ultralytics.nn.modules.conv.Conv             [3, 64, 3, 2]                 
  1                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  2                  -1  3    279808  ultralytics.nn.modules.block.C2f             [128, 128, 3, True]           
  3                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  4                  -1  6   2101248  ultralytics.nn.modules.block.C2f             [256, 256, 6, True]           
  5                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256, 512, 3, 2]              


  6                  -1  6   8396800  ultralytics.nn.modules.block.C2f             [512, 512, 6, True]           
  7                  -1  1   2360320  ultralytics.nn.modules.conv.Conv             [512, 512, 3, 2]              
  8                  -1  3   4461568  ultralytics.nn.modules.block.C2f             [512, 512, 3, True]           
  9                  -1  1    656896  ultralytics.nn.modules.block.SPPF            [512, 512, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  3   4723712  ultralytics.nn.modules.block.C2f             [1024, 512, 3]                
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]  

In [14]:
model.export(format='onnx')

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-1.13.1+cu117 CPU (12th Gen Intel Core(TM) i5-12600)
YOLOv8l-seg summary (fused): 295 layers, 45973568 parameters, 0 gradients

[34m[1mPyTorch:[0m starting from 'yolov8l-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (88.1 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 16...
[34m[1mONNX:[0m export success ✅ 1.6s, saved as 'yolov8l-seg.onnx' (175.6 MB)

Export complete (3.6s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework2/segmentation[0m
Predict:         yolo predict task=segment model=yolov8l-seg.onnx imgsz=640  
Validate:        yolo val task=segment model=yolov8l-seg.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8l-seg.onnx'

In [15]:
import os

os.path.getsize('yolov8l-seg.pt') / 1024**2

88.11174297332764

In [16]:
import os

os.path.getsize('yolov8l-seg.onnx') / 1024**2

175.61448764801025

In [17]:
def quantize_onnx_model(onnx_model_path, quantized_model_path):
    from onnxruntime.quantization import quantize_dynamic, QuantType
    import onnx
    onnx_opt_model = onnx.load(onnx_model_path)
    quantize_dynamic(onnx_model_path,
                     quantized_model_path,
                     weight_type=QuantType.QUInt8)

    print(f"quantized model saved to:{quantized_model_path}")

In [18]:
quantize_onnx_model('yolov8l-seg.onnx', 'quant_yolov8l-seg.onnx')



quantized model saved to:quant_yolov8l-seg.onnx


In [19]:
os.path.getsize('quant_yolov8l-seg.onnx') / 1024**2

45.09090232849121

In [20]:
model = YOLO("quant_yolov8l-seg.onnx")

In [21]:
# Validate the model
metrics = model.val(task='segment', imgsz=640, data='ms_coco_val_2017.yaml', batch=16, device=0)

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-1.13.1+cu117 CUDA:0 (NVIDIA A10, 24074MiB)
Loading quant_yolov8l-seg.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,640,640) for non-PyTorch models
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework1/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 5000/5000 [19:15<00:00,  4.33it/s]
                   all       5000      36335      0.716      0.615      0.675       0.51      0.719      0.589      0.644       0.42
                person       5000      10777      0.811      0.758      0.835      0.628      0.817      0.732      0.814      0.508
               bicycle       5000        314      0.725      0.525      0.633      0.417      0.704       0.47

In [22]:
print(f'mask map50-95 {metrics.seg.map:.3}')
print(f'mask map50 {metrics.seg.map50:.3}')
print(f'mask map75 {metrics.seg.map75:.3}')

mask map50-95 0.42
mask map50 0.644
mask map75 0.454


In [None]:
# исходные!
# mask map50-95 0.431
# mask map50 0.659
# mask map75 0.465

# Прунинг

Iterative Pruning
The same parameter in a module can be pruned multiple times, with the effect of the various pruning calls being equal to the combination of the various masks applied in series. The combination of a new mask with the old mask is handled by the PruningContainer’s compute_mask method.

Say, for example, that we now want to further prune module.weight, this time using structured pruning along the 0th axis of the tensor (the 0th axis corresponds to the output channels of the convolutional layer and has dimensionality 6 for conv1), based on the channels’ L2 norm. This can be achieved using the ln_structured function, with n=2 and dim=0

In [23]:
import torch, torch.nn as nn
from torch.nn.utils import prune
from ultralytics import YOLO


model = YOLO("yolov8l-seg.pt")

def sparsity(model):
    # Return global model sparsity
    a, b = 0, 0
    for p in model.parameters():
        a += p.numel()
        b += (p == 0).sum()
    return b / a

for name, m in model.model.named_modules():
    if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear):
        print(f'Before {m.weight}')
        prune.ln_structured(m, name='weight', amount=0.05, n=2, dim=0)  # prune
        print(f'After {m.weight}')
        prune.remove(m, 'weight')  # make permanent
print(f'Model pruned to {sparsity(model.model):.3g} global sparsity')

ckpt = {

            'model': model.model,
            'train_args': {},  # save as dict
}

print(f'Pruned model size: {check_model_size(model.model)}')

torch.save(ckpt, './model_pruned.pt')

pruned_model = YOLO("./model_pruned.pt")

#results = pruned_model.val()

Before Parameter containing:
tensor([[[[ 1.0357e-03, -1.0452e-02,  1.3603e-02],
          [ 2.1008e-01, -1.9971e-01, -1.4839e-02],
          [-2.1631e-01,  2.2192e-01, -6.9695e-03]],

         [[ 6.2346e-05, -9.8114e-03,  4.5395e-03],
          [ 2.4463e-01, -2.4231e-01, -1.6394e-03],
          [-2.4304e-01,  2.4048e-01,  6.1836e-03]],

         [[-1.0658e-02,  2.5925e-02, -1.0002e-02],
          [ 1.4575e-01, -1.5576e-01,  1.5381e-02],
          [-1.3501e-01,  1.2085e-01,  1.8110e-03]]],


        [[[-2.0532e-01,  1.5698e-01,  6.4758e-02],
          [ 2.0264e-01,  1.6602e-02, -2.2827e-01],
          [ 7.2098e-03, -1.3013e-01,  1.1633e-01]],

         [[-2.3242e-01,  9.9609e-02,  1.1566e-01],
          [ 1.9690e-01, -6.4735e-03, -1.8274e-01],
          [ 1.9989e-02, -5.5237e-02,  4.0100e-02]],

         [[-1.5771e-01,  3.6774e-02,  1.1108e-01],
          [ 9.0088e-02, -1.0757e-02, -5.0232e-02],
          [ 4.4189e-02, -1.5587e-02, -4.7150e-02]]],


        [[[ 1.8762e-01, -2.5488e-01, 

In [28]:
model.nc

AttributeError: 'YOLO' object has no attribute 'nc'. See valid attributes below.

    YOLO (You Only Look Once) object detection model.
    

In [27]:
# results = pruned_model.val(data='coco.yaml')
metrics = pruned_model.val(task='segment', imgsz=640, data='ms_coco_val_2017.yaml', batch=16, device=0)

Ultralytics YOLOv8.0.176 🚀 Python-3.10.11 torch-1.13.1+cu117 CUDA:0 (NVIDIA A10, 24074MiB)
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework1/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/313 [00:00<?, ?it/s]


IndexError: index 97 is out of bounds for axis 0 with size 81

In [None]:
print(f'mask map50-95 {metrics.seg.map:.3}')
print(f'mask map50 {metrics.seg.map50:.3}')
print(f'mask map75 {metrics.seg.map75:.3}')

box map50-95 0.217
box map50 0.307
box map75 0.238


In [None]:
check_model_size(pruned_model.model)

98.74755859375