# Методы компрессии нейронных сетей
## Лекция №2 - Методы снижения размерности.

*   Квантизация весов моделей
*   Pruning
*   Практика - по квантизации модели и применению прунинга

## ДЗ №2
Применить данные подходы к своим моделям и замерить производительность

## Домашняя работа
В качестве модели выбрана YOLOv8m-cls

Проверяются следующие метрики производительности:

*   число параметров модели;
*   вес файла модели;
*   время инференса;
*   целевая метрика.

In [None]:
!pip install ultralytics==8.0.180

In [None]:
from ultralytics import YOLO
import torch
from torch.nn.utils import prune

In [None]:
def measure_size(model):
  param_size = 0
  for param in model.model.parameters():
      param_size += param.nelement() * param.element_size()
  buffer_size = 0
  for buffer in model.model.buffers():
      buffer_size += buffer.nelement() * buffer.element_size()

  size_all_mb = (param_size + buffer_size) / 1024**2
  print('model size: {:.3f}MB'.format(size_all_mb))

# Основная модель

In [None]:
model = YOLO('./best.pt')

#### Число параметров модели

In [None]:
model.info()

YOLOv8m-cls summary: 141 layers, 15785146 parameters, 0 gradients, 41.9 GFLOPs


(141, 15785146, 0, 41.901260799999996)

#### Размер модели

In [None]:
measure_size(model)

model size: 60.179MB


#### Целевая метрика

In [None]:
metrics = model.val(data='imagenette')

In [None]:
print("top1_acc:", metrics.top1)
print("top5_acc:", metrics.top5)

top1_acc: 0.9714649319648743
top5_acc: 0.9989808201789856


#### Время инференса

In [None]:
for stage, time in metrics.speed.items():
    print(f'{stage} speed {time:.3f} ms')

preprocess speed 0.106 ms
inference speed 1.840 ms
loss speed 0.004 ms
postprocess speed 0.004 ms


# Квантизация

In [None]:
model_int8 = torch.ao.quantization.quantize_dynamic(
    model.model,
    {torch.nn.Linear, torch.nn.Conv2d},
    dtype=torch.qint8)

In [None]:
ckpt = {
        'model': model_int8,
        'train_args': {},  # save as dict
}
torch.save(ckpt, './model_quant.pt')

In [None]:
quant_model = YOLO('./model_quant.pt')
quant_model.task = 'classify'

  device=storage.device,


#### Число параметров модели

In [None]:
quant_model.info()

YOLOv8m-cls summary (fused): 104 layers, 15762656 parameters, 0 gradients


(104, 15762656, 0, 0)

#### Размер модели

In [None]:
measure_size(quant_model)

model size: 60.130MB


#### Целевая метрика

In [None]:
quant_model_metrics = quant_model.val(data='imagenette')

In [None]:
print("top1_acc:", quant_model_metrics.top1)
print("top5_acc:", quant_model_metrics.top5)

top1_acc: 0.8496814966201782
top5_acc: 0.9898088574409485


#### Время инференса

In [None]:
for stage, time in quant_model_metrics.speed.items():
    print(f'{stage} speed {time:.3f} ms')

preprocess speed 0.740 ms
inference speed 12.414 ms
loss speed 0.002 ms
postprocess speed 0.001 ms


# Прунинг

In [None]:
def sparsity(model):
    # Return global model sparsity
    a, b = 0, 0
    for p in model.parameters():
        a += p.numel()
        b += (p == 0).sum()
    return b / a

In [None]:
for name, m in model.model.named_modules():
    if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
        prune.ln_structured(m, name='weight', amount=0.05, n=2, dim=0)  # prune
        prune.remove(m, 'weight')  # make permanent
print(f'Model pruned to {sparsity(model.model):.3g} global sparsity')

Model pruned to 0.05 global sparsity


In [None]:
ckpt = {
    'model': model.model,
    'train_args': {},  # save as dict
}

torch.save(ckpt, './model_pruned.pt')

In [None]:
pruned_model = YOLO("./model_pruned.pt")
pruned_model.task = 'classify'

#### Число параметров модели

In [None]:
pruned_model.info()

YOLOv8m-cls summary (fused): 103 layers, 15775466 parameters, 0 gradients


(103, 15775466, 0, 0)

#### Размер модели

In [None]:
measure_size(pruned_model)

model size: 60.179MB


#### Целевая метрика

In [None]:
pruned_model_metrics = pruned_model.val(data='imagenette')

Ultralytics YOLOv8.0.180 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mtrain:[0m /content/datasets/imagenette/train... found 9469 images in 10 classes ✅ 
[34m[1mval:[0m /content/datasets/imagenette/val... found 3925 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /content/datasets/imagenette/val... 3925 images, 0 corrupt: 100%|██████████| 3925/3925 [00:00<?, ?it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 246/246 [01:07<00:00,  3.65it/s]
                   all      0.139      0.643
Speed: 0.9ms preprocess, 12.4ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/detect/val2[0m


In [None]:
print("top1_acc:", pruned_model_metrics.top1)
print("top5_acc:", pruned_model_metrics.top5)

top1_acc: 0.1385987251996994
top5_acc: 0.6433120965957642


#### Время инференса

In [None]:
for stage, time in pruned_model_metrics.speed.items():
    print(f'{stage} speed {time:.3f} ms')

preprocess speed 0.885 ms
inference speed 12.449 ms
loss speed 0.001 ms
postprocess speed 0.001 ms
