# PyTorch Fuse

In [1]:
import ultralytics
import shutil

ultralytics.checks()

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CUDA:0 (NVIDIA A10, 24074MiB)
Setup complete ✅ (12 CPUs, 31.1 GB RAM, 297.4/913.8 GB disk)


In [2]:
from ultralytics import YOLO
import os

# Load a model
# build a new model from scratch
model = YOLO("yolov8s-seg.yaml")
# load a pretrained model (recommended for training)
model = YOLO("yolov8s-seg.pt")


                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256

## Измерим параметры на ГПУ

In [3]:
# Validate the model
# При валлидации происходит Fusing слоев модели!
# было 261 слой, стало 195 слоев ! 
# уменьшение слоев примерно на 25%
metrics = model.val(data="ms_coco_val_2017.yaml", device=0)

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CUDA:0 (NVIDIA A10, 24074MiB)
YOLOv8s-seg summary (fused): 195 layers, 11810560 parameters, 0 gradients, 42.6 GFLOPs
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [00:26<00:00, 11.93it/s]
                   all       5000      36335      0.692      0.555      0.609      0.445      0.683      0.536       0.58      0.371
                person       5000      10777      0.801      0.712      0.797      0.575      0.796      0.694      0.774      0.465
               bicycle       5000        314       0.75      0.481      0.574      0.337      0.666      0.411      0.462      0.194
                   car  

## Измерим параметры на ЦПУ

In [4]:
metrics = model.val(data="ms_coco_val_2017.yaml", device='cpu')

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 313/313 [07:42<00:00,  1.48s/it]
                   all       5000      36335      0.692      0.555      0.609      0.445      0.684      0.536       0.58      0.371
                person       5000      10777      0.801      0.712      0.797      0.575      0.798      0.694      0.774      0.465
               bicycle       5000        314      0.751      0.481      0.573      0.337      0.666      0.411      0.462      0.194
                   car       5000       1918      0.717      0.588      0.652      0.443      0.704   

In [5]:
# оценим метрики для сегментации
print(f"mask map50-95 {metrics.seg.map:.3}")
print(f"mask map50 {metrics.seg.map50:.3}")
print(f"mask map75 {metrics.seg.map75:.3}")

mask map50-95 0.371
mask map50 0.58
mask map75 0.396


In [6]:
# оценим размер сериализованной модели
model_size = os.path.getsize("yolov8s-seg.pt") / 1024**2
print(f"{model_size:.3} Мб")

22.8 Мб


# Экспорт моделей в различные форматы

Согласно официальной документации и туториалу `YOLOv8` поддерживает экспорт в множество различных форматов. 

Export a YOLOv8 model to any supported format below with the `format` argument, i.e. `format=onnx`. See [YOLOv8 Export Docs](https://docs.ultralytics.com/modes/export/) for more information.

- 💡 ProTip: Export to [ONNX](https://onnx.ai/) or [OpenVINO](https://docs.openvino.ai/latest/index.html) for up to 3x CPU speedup.  
- 💡 ProTip: Export to [TensorRT](https://developer.nvidia.com/tensorrt) for up to 5x GPU speedup.


| Format                                                             | `format` Argument | Model                     | Metadata | Arguments                                           |
|--------------------------------------------------------------------|-------------------|---------------------------|----------|-----------------------------------------------------|
| [PyTorch](https://pytorch.org/)                                    | -                 | `yolov8n.pt`              | ✅        | -                                                   |
| [TorchScript](https://pytorch.org/docs/stable/jit.html)            | `torchscript`     | `yolov8n.torchscript`     | ✅        | `imgsz`, `optimize`                                 |
| [ONNX](https://onnx.ai/)                                           | `onnx`            | `yolov8n.onnx`            | ✅        | `imgsz`, `half`, `dynamic`, `simplify`, `opset`     |
| [OpenVINO](https://docs.openvino.ai/latest/index.html)             | `openvino`        | `yolov8n_openvino_model/` | ✅        | `imgsz`, `half`                                     |
| [TensorRT](https://developer.nvidia.com/tensorrt)                  | `engine`          | `yolov8n.engine`          | ✅        | `imgsz`, `half`, `dynamic`, `simplify`, `workspace` |
| [CoreML](https://github.com/apple/coremltools)                     | `coreml`          | `yolov8n.mlpackage`       | ✅        | `imgsz`, `half`, `int8`, `nms`                      |
| [TF SavedModel](https://www.tensorflow.org/guide/saved_model)      | `saved_model`     | `yolov8n_saved_model/`    | ✅        | `imgsz`, `keras`                                    |
| [TF GraphDef](https://www.tensorflow.org/api_docs/python/tf/Graph) | `pb`              | `yolov8n.pb`              | ❌        | `imgsz`                                             |
| [TF Lite](https://www.tensorflow.org/lite)                         | `tflite`          | `yolov8n.tflite`          | ✅        | `imgsz`, `half`, `int8`                             |
| [TF Edge TPU](https://coral.ai/docs/edgetpu/models-intro/)         | `edgetpu`         | `yolov8n_edgetpu.tflite`  | ✅        | `imgsz`                                             |
| [TF.js](https://www.tensorflow.org/js)                             | `tfjs`            | `yolov8n_web_model/`      | ✅        | `imgsz`                                             |
| [PaddlePaddle](https://github.com/PaddlePaddle)                    | `paddle`          | `yolov8n_paddle_model/`   | ✅        | `imgsz`                                             |
| [ncnn](https://github.com/Tencent/ncnn)                            | `ncnn`            | `yolov8n_ncnn_model/`     | ✅        | `imgsz`, `half`                                     |


# Экспорт в ONNX

## ONNX only

In [10]:
# создадим чистую исходную модель

from ultralytics import YOLO
import os

# Load a model
# build a new model from scratch
model = YOLO("yolov8s-seg.yaml")
# load a pretrained model (recommended for training)
model = YOLO("yolov8s-seg.pt")


                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              


  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256, 512, 3, 2]              
  8                  -1  1   1838080  ultralytics.nn.modules.block.C2f             [512, 512, 1, True]           
  9                  -1  1    656896  ultralytics.nn.modules.block.SPPF            [512, 512, 5]                 
 10                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 11             [-1, 6]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 12                  -1  1    591360  ultralytics.nn.modules.block.C2f             [768, 256, 1]                 
 13                  -1  1         0  torch.nn.modules.upsampling.Upsample         [None, 2, 'nearest']          
 14             [-1, 4]  1         0  ultralytics.nn.modules.conv.Concat           [1]  

In [11]:
model.export(format="onnx")

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
YOLOv8s-seg summary (fused): 195 layers, 11810560 parameters, 0 gradients, 42.6 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8s-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (22.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...
[34m[1mONNX:[0m export success ✅ 0.4s, saved as 'yolov8s-seg.onnx' (45.3 MB)

Export complete (1.8s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation[0m
Predict:         yolo predict task=segment model=yolov8s-seg.onnx imgsz=640  
Validate:        yolo val task=segment model=yolov8s-seg.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8s-seg.onnx'

In [9]:
# Load a model
model = YOLO("yolov8s-seg.onnx")

FileNotFoundError: 'yolov8s-seg.onnx' does not exist

## Измерим параметры на ГПУ

In [None]:
metrics = model.val(data="ms_coco_val_2017.yaml", imgsz=640, device=0)

### Измерим параметры на ЦПУ

In [None]:
metrics = model.val(data="ms_coco_val_2017.yaml", imgsz=640, device='cpu')

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
Loading yolov8s-seg.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,640,640) for non-PyTorch models
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework1/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 5000/5000 [09:02<00:00,  9.22it/s]
                   all       5000      36335      0.692      0.552      0.606      0.444      0.686      0.531      0.577       0.37
                person       5000      10777      0.798      0.711      0.796      0.574      0.797      0.691      0.772      0.464
               bicycle       5000        314      0.728      0.465      0.567      0.335      0.644      0.3

In [None]:
# оценим метрики для сегментации
print(f"mask map50-95 {metrics.seg.map:.3}")
print(f"mask map50 {metrics.seg.map50:.3}")
print(f"mask map75 {metrics.seg.map75:.3}")

mask map50-95 0.37
mask map50 0.577
mask map75 0.395


In [None]:
# оценим размер сериализованной модели
model_size = os.path.getsize("yolov8s-seg.onnx") / 1024**2
print(f"{model_size:.3} Мб")

45.3 Мб


In [12]:
os.remove("yolov8s-seg.onnx")

## ONNX simplify

In [13]:
# создадим чистую исходную модель

from ultralytics import YOLO
import os

# Load a model
# build a new model from scratch
model = YOLO("yolov8s-seg.yaml")
# load a pretrained model (recommended for training)
model = YOLO("yolov8s-seg.pt")


                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256

 19                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1   1969152  ultralytics.nn.modules.block.C2f             [768, 512, 1]                 
 22        [15, 18, 21]  1   2801504  ultralytics.nn.modules.head.Segment          [80, 32, 128, [128, 256, 512]]
YOLOv8s-seg summary: 261 layers, 11821056 parameters, 11821040 gradients, 42.9 GFLOPs



In [14]:
model.export(format="onnx", simplify=True)

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
YOLOv8s-seg summary (fused): 195 layers, 11810560 parameters, 0 gradients, 42.6 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8s-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (22.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...
[34m[1mONNX:[0m simplifying with onnxsim 0.4.35...
[34m[1mONNX:[0m export success ✅ 1.0s, saved as 'yolov8s-seg.onnx' (45.2 MB)

Export complete (2.5s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation[0m
Predict:         yolo predict task=segment model=yolov8s-seg.onnx imgsz=640  
Validate:        yolo val task=segment model=yolov8s-seg.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8s-seg.onnx'

In [15]:
# Load a model
model = YOLO("yolov8s-seg.onnx")

### Измерим параметры на ЦПУ

In [16]:
metrics = model.val(data="ms_coco_val_2017.yaml", imgsz=640, device='cpu')

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
Loading yolov8s-seg.onnx for ONNX Runtime inference...
Forcing batch=1 square inference (1,3,640,640) for non-PyTorch models
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):   0%|          | 16/5000 [00:01<09:05,  9.13it/s]


KeyboardInterrupt: 

In [None]:
# оценим метрики для сегментации
print(f"mask map50-95 {metrics.seg.map:.3}")
print(f"mask map50 {metrics.seg.map50:.3}")
print(f"mask map75 {metrics.seg.map75:.3}")

In [None]:
# оценим размер сериализованной модели
model_size = os.path.getsize("yolov8s-seg.onnx") / 1024**2
print(f"{model_size:.3} Мб")

In [None]:
os.remove("yolov8s-seg.onnx")

## ONNX simplify half

In [17]:
# создадим чистую исходную модель

from ultralytics import YOLO
import os

# Load a model
# build a new model from scratch
model = YOLO("yolov8s-seg.yaml")
# load a pretrained model (recommended for training)
model = YOLO("yolov8s-seg.pt")


                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256

In [18]:
model.export(format="onnx", simplify=True, half=True)

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
YOLOv8s-seg summary (fused): 195 layers, 11810560 parameters, 0 gradients, 42.6 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8s-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (22.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...
[34m[1mONNX:[0m simplifying with onnxsim 0.4.35...
[34m[1mONNX:[0m export success ✅ 1.0s, saved as 'yolov8s-seg.onnx' (45.2 MB)

Export complete (2.3s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation[0m
Predict:         yolo predict task=segment model=yolov8s-seg.onnx imgsz=640  
Validate:        yolo val task=segment model=yolov8s-seg.onnx imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8s-seg.onnx'

In [None]:
# Load a model
model = YOLO("yolov8s-seg.onnx")

### Измерим параметры на ЦПУ

In [None]:
metrics = model.val(data="ms_coco_val_2017.yaml", imgsz=640, device='cpu')

In [None]:
# оценим метрики для сегментации
print(f"mask map50-95 {metrics.seg.map:.3}")
print(f"mask map50 {metrics.seg.map50:.3}")
print(f"mask map75 {metrics.seg.map75:.3}")

In [None]:
# оценим размер сериализованной модели
model_size = os.path.getsize("yolov8s-seg.onnx") / 1024**2
print(f"{model_size:.3} Мб")

In [None]:
os.remove("yolov8s-seg.onnx")

# Экспорт в TensorRT

## TensorRT

## TensorRT Simplify

## TensorRT Simplify Half

In [19]:
# создадим чистую исходную модель

from ultralytics import YOLO
import os

# Load a model
# build a new model from scratch
model = YOLO("yolov8s-seg.yaml")
# load a pretrained model (recommended for training)
model = YOLO("yolov8s-seg.pt")


                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256

In [20]:
model.export(format="engine", simplify=True, half=True)

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CUDA:0 (NVIDIA A10, 24074MiB)
YOLOv8s-seg summary (fused): 195 layers, 11810560 parameters, 0 gradients, 42.6 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8s-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (22.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...
[34m[1mONNX:[0m simplifying with onnxsim 0.4.35...
[34m[1mONNX:[0m export success ✅ 0.8s, saved as 'yolov8s-seg.onnx' (45.2 MB)

[34m[1mTensorRT:[0m starting export with TensorRT 8.6.1...
[34m[1mTensorRT:[0m input "images" with shape(1, 3, 640, 640) DataType.FLOAT
[34m[1mTensorRT:[0m output "output0" with shape(1, 116, 8400) DataType.FLOAT
[34m[1mTensorRT:[0m output "output1" with shape(1, 32, 160, 160) DataType.FLOAT
[34m[1mTensorRT:[0m building FP32 engine as yolov8s-seg.engine


[10/22/2023-20:09:01] [TRT] [I] [MemUsageChange] Init CUDA: CPU +349, GPU +0, now: CPU 5137, GPU 12330 (MiB)
[10/22/2023-20:09:05] [TRT] [I] [MemUsageChange] Init builder kernel library: CPU +1218, GPU +268, now: CPU 6431, GPU 12598 (MiB)
[10/22/2023-20:09:05] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading
[10/22/2023-20:09:05] [TRT] [I] ----------------------------------------------------------------
[10/22/2023-20:09:05] [TRT] [I] Input filename:   yolov8s-seg.onnx
[10/22/2023-20:09:05] [TRT] [I] ONNX IR version:  0.0.8
[10/22/2023-20:09:05] [TRT] [I] Opset version:    17
[10/22/2023-20:09:05] [TRT] [I] Producer name:    pytorch
[10/22/2023-20:09:05] [TRT] [I] Producer version: 2.1.0
[10/22/2023-20:09:05] [TRT] [I] Domain:           
[10/22/2023-20:09:05] [TRT] [I] Mod

[34m[1mTensorRT:[0m export success ✅ 90.6s, saved as 'yolov8s-seg.engine' (54.6 MB)

Export complete (90.8s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation[0m
Predict:         yolo predict task=segment model=yolov8s-seg.engine imgsz=640  
Validate:        yolo val task=segment model=yolov8s-seg.engine imgsz=640 data=coco.yaml  
Visualize:       https://netron.app


'yolov8s-seg.engine'

In [21]:
# Load a model
model = YOLO("yolov8s-seg.engine")

### Измерим параметры на ГПУ

In [22]:
metrics = model.val(data="ms_coco_val_2017.yaml", imgsz=640, device=0)

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CUDA:0 (NVIDIA A10, 24074MiB)
Loading yolov8s-seg.engine for TensorRT inference...


[10/22/2023-20:10:31] [TRT] [I] Loaded engine size: 54 MiB
[10/22/2023-20:10:31] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +51, now: CPU 0, GPU 51 (MiB)
[10/22/2023-20:10:31] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +39, now: CPU 0, GPU 90 (MiB)
[10/22/2023-20:10:31] [TRT] [W] CUDA lazy loading is not enabled. Enabling it can significantly reduce device memory usage and speed up TensorRT initialization. See "Lazy Loading" section of CUDA documentation https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#lazy-loading


[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95):  20%|█▉        | 981/5000 [00:10<00:44, 90.38it/s]


KeyboardInterrupt: 

In [6]:
# оценим метрики для сегментации
print(f"mask map50-95 {metrics.seg.map:.3}")
print(f"mask map50 {metrics.seg.map50:.3}")
print(f"mask map75 {metrics.seg.map75:.3}")

mask map50-95 0.37
mask map50 0.577
mask map75 0.395


In [8]:
# оценим размер сериализованной модели
model_size = os.path.getsize("yolov8s-seg.engine") / 1024**2
print(f"{model_size:.3} Мб")

54.7 Мб


In [None]:
os.remove("yolov8s-seg.engine")

# Экспорт в OpenVino

## OpenVino

In [23]:
# создадим чистую исходную модель

from ultralytics import YOLO
import os

# Load a model
# build a new model from scratch
model = YOLO("yolov8s-seg.yaml")
# load a pretrained model (recommended for training)
model = YOLO("yolov8s-seg.pt")


                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256

 15                  -1  1    148224  ultralytics.nn.modules.block.C2f             [384, 128, 1]                 
 16                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 19                  -1  1    590336  ultralytics.nn.modules.conv.Conv             [256, 256, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1   1969152  ultralytics.nn.modules.block.C2f             [768, 512, 1]                 
 22        [15, 18, 21]  1   2801504  ultralytics.nn.modules.head.Segment          [80, 32, 128, [128, 256, 512]]
YOLOv8s-seg summary: 261 layers, 11821056 parameters, 11821040 gradients, 42.9 GFLOPs



In [24]:
model.export(format="openvino")

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
YOLOv8s-seg summary (fused): 195 layers, 11810560 parameters, 0 gradients, 42.6 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8s-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (22.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...
[34m[1mONNX:[0m export success ✅ 0.5s, saved as 'yolov8s-seg.onnx' (45.3 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2023.1.0-12185-9e6b00e51cd-releases/2023/1...
[34m[1mOpenVINO:[0m export success ✅ 0.6s, saved as 'yolov8s-seg_openvino_model/' (45.4 MB)

Export complete (2.5s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation[0m
Predict:         yolo predict task=segment model=yolov8s-seg_openvino_model imgsz=640  
Validate:        yolo val task=segment model=yolov8s-seg_openvino_model imgsz=640 data=coco.yaml  
Visualize:       htt

'yolov8s-seg_openvino_model'

In [11]:
# Load a model
model = YOLO("yolov8s-seg_openvino_model")

### Измерим параметры на ЦПУ

In [12]:
metrics = model.val(data="ms_coco_val_2017.yaml", imgsz=640, device='cpu')

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
Loading yolov8s-seg_openvino_model for OpenVINO inference...
Forcing batch=1 square inference (1,3,640,640) for non-PyTorch models
[34m[1mval: [0mScanning /home/admin-gpu/Downloads/yolo_VIKA/homework1/segmentation/datasets/ms_coco_val_2017/val/labels.cache... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Mask(P          R      mAP50  mAP50-95): 100%|██████████| 5000/5000 [06:13<00:00, 13.39it/s]
                   all       5000      36335      0.692      0.552      0.606      0.444      0.686      0.531      0.577       0.37
                person       5000      10777      0.798      0.711      0.796      0.574      0.797      0.691      0.772      0.464
               bicycle       5000        314      0.728      0.465      0.567      0.335      0.644   

In [15]:
# оценим метрики для сегментации
print(f"mask map50-95 {metrics.seg.map:.3}")
print(f"mask map50 {metrics.seg.map50:.3}")
print(f"mask map75 {metrics.seg.map75:.3}")

mask map50-95 0.37
mask map50 0.577
mask map75 0.395


In [17]:
# оценим размер сериализованной модели
model_size = sum(os.path.getsize(f) for f in os.listdir('.') if os.path.isfile(f)) / 1024**2
print(f"{model_size:.3} Мб")

1.23e+02 Мб


In [25]:
shutil.rmtree('yolov8s-seg_openvino_model')

## OpenVino half

In [26]:
# создадим чистую исходную модель

from ultralytics import YOLO
import os

# Load a model
# build a new model from scratch
model = YOLO("yolov8s-seg.yaml")
# load a pretrained model (recommended for training)
model = YOLO("yolov8s-seg.pt")


                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256

In [27]:
model.export(format="openvino", half=True)

Ultralytics YOLOv8.0.200 🚀 Python-3.10.13 torch-2.1.0+cu121 CPU (12th Gen Intel Core(TM) i5-12600)
YOLOv8s-seg summary (fused): 195 layers, 11810560 parameters, 0 gradients, 42.6 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8s-seg.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) ((1, 116, 8400), (1, 32, 160, 160)) (22.8 MB)

[34m[1mONNX:[0m starting export with onnx 1.14.1 opset 17...
[34m[1mONNX:[0m export success ✅ 0.5s, saved as 'yolov8s-seg.onnx' (45.3 MB)

[34m[1mOpenVINO:[0m starting export with openvino 2023.1.0-12185-9e6b00e51cd-releases/2023/1...
[34m[1mOpenVINO:[0m export success ✅ 0.1s, saved as 'yolov8s-seg_openvino_model/' (22.9 MB)

Export complete (2.0s)
Results saved to [1m/home/admin-gpu/Downloads/yolo_VIKA/homework5/segmentation[0m
Predict:         yolo predict task=segment model=yolov8s-seg_openvino_model imgsz=640 half 
Validate:        yolo val task=segment model=yolov8s-seg_openvino_model imgsz=640 data=coco.yaml half 
Visualize:  

'yolov8s-seg_openvino_model'

In [28]:
# Load a model
model = YOLO("yolov8s-seg_openvino_model")

### Измерим параметры на ЦПУ

In [None]:
metrics = model.val(data="ms_coco_val_2017.yaml", imgsz=640, device='cpu')

In [None]:
# оценим метрики для сегментации
print(f"mask map50-95 {metrics.seg.map:.3}")
print(f"mask map50 {metrics.seg.map50:.3}")
print(f"mask map75 {metrics.seg.map75:.3}")

In [29]:
# оценим размер сериализованной модели
model_size = sum(os.path.getsize(f) for f in os.listdir('.') if os.path.isfile(f)) / 1024**2
print(f"{model_size:.3} Мб")

1.23e+02 Мб


In [30]:
shutil.rmtree('yolov8s-seg_openvino_model')