# Методы компрессии нейронных сетей

## Лекция №1 - Введение в компрессию моделей машинного обучения: Вводная
- Что такое компрессия моделей и зачем она нужна?
- Основные задачи в компрессии моделей машинного обучения
- Ограничения и применимость
- Организационные моменты, как будет проходить курс итд

## ДЗ №1
Разделиться на команды по 4 человека и выбрать модели для оптимизации, сделать базовые замеры метрик производительности


## Домашняя работа

В качестве модели выбрана YOLOv8m-cls

Проверяются следующие метрики производительности:
- число параметров модели;
- вес файла модели;
- количество RAM потребляемое моделью;
- время инференса;
- целевая метрика.

### 1. Установка

In [47]:
!pip install ultralytics;

In [None]:
! pip3 uninstall torch -y
! pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [55]:
!pip install nvidia-ml-py3

Collecting nvidia-ml-py3
  Downloading nvidia-ml-py3-7.352.0.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: nvidia-ml-py3
  Building wheel for nvidia-ml-py3 (setup.py) ... [?25l[?25hdone
  Created wheel for nvidia-ml-py3: filename=nvidia_ml_py3-7.352.0-py3-none-any.whl size=19171 sha256=34bdd40c73936ee3c312b45a13961eb0a9d5e4a951453a6bdc9542e301bed109
  Stored in directory: /root/.cache/pip/wheels/5c/d8/c0/46899f8be7a75a2ffd197a23c8797700ea858b9b34819fbf9e
Successfully built nvidia-ml-py3
Installing collected packages: nvidia-ml-py3
Successfully installed nvidia-ml-py3-7.352.0


In [51]:
import torch
torch.cuda.set_device(0) # Set to your desired GPU number
torch.cuda.is_available()

True

### 2. Обучение модели

#### Замеры RAM-before

In [57]:
import tensorflow as tf
import math
import nvidia_smi
info_gpus = tf.config.list_physical_devices('GPU')
if len(info_gpus) > 0:
    nvidia_smi.nvmlInit()

    device_count = nvidia_smi.nvmlDeviceGetCount()
    for i in range(device_count):
      handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
      info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
      print(f"Device {i}: {nvidia_smi.nvmlDeviceGetName(handle).decode()}")
      print(f"Memory : {round(100*info.free/info.total,2)}% free: {info.total}(total), {info.free} (free), {info.used} (used)")

    nvidia_smi.nvmlShutdown()
else:
  print("No GPU used")

Device 0: Tesla T4
Memory : 82.91% free: 16106127360(total), 13353418752 (free), 2752708608 (used)


In [58]:
mem_before = info.used

#### Число параметров

In [60]:
from ultralytics import YOLO

# Load a COCO-pretrained YOLOv8n model
model = YOLO('yolov8m-cls.pt')

In [61]:
model.info()

YOLOv8m-cls summary: 141 layers, 17053336 parameters, 0 gradients


(141, 17053336, 0, 0.0)

#### Вес модели

In [62]:
param_size = 0
for param in model.model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 65.127MB


#### Целевая метрика

In [63]:
# Train the model
results = model.train(data='imagenette', epochs=20, imgsz=224, device=0)

Ultralytics YOLOv8.0.176 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=classify, mode=train, model=yolov8m-cls.pt, data=imagenette, epochs=20, patience=50, batch=16, imgsz=224, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optim

In [64]:
metrics = model.val()  # no arguments needed, dataset and settings remembered

Ultralytics YOLOv8.0.176 🚀 Python-3.10.12 torch-2.0.1+cu118 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8m-cls summary (fused): 103 layers, 15775466 parameters, 0 gradients
[34m[1mtrain:[0m /content/datasets/imagenette/train... found 9469 images in 10 classes ✅ 
[34m[1mval:[0m /content/datasets/imagenette/val... found 3925 images in 10 classes ✅ 
[34m[1mtest:[0m None...
[34m[1mval: [0mScanning /content/datasets/imagenette/val... 3925 images, 0 corrupt: 100%|██████████| 3925/3925 [00:00<?, ?it/s]
               classes   top1_acc   top5_acc: 100%|██████████| 246/246 [00:28<00:00,  8.72it/s]
                   all      0.975      0.998
Speed: 0.1ms preprocess, 2.1ms inference, 0.0ms loss, 0.0ms postprocess per image
Results saved to [1mruns/classify/val3[0m


In [65]:
print("top1_acc:", metrics.top1)
print("top5_acc:", metrics.top5)

top1_acc: 0.9747770428657532
top5_acc: 0.9982165098190308


#### Замеры RAM-after

In [66]:
import tensorflow as tf
import math
import nvidia_smi
info_gpus = tf.config.list_physical_devices('GPU')
if len(info_gpus) > 0:
    nvidia_smi.nvmlInit()

    device_count = nvidia_smi.nvmlDeviceGetCount()
    for i in range(device_count):
      handle = nvidia_smi.nvmlDeviceGetHandleByIndex(i)
      info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
      print(f"Device {i}: {nvidia_smi.nvmlDeviceGetName(handle).decode()}")
      print(f"Memory : {round(100*info.free/info.total,2)}% free: {info.total}(total), {info.free} (free), {info.used} (used)")

    nvidia_smi.nvmlShutdown()
else:
  print("No GPU used")

Device 0: Tesla T4
Memory : 80.19% free: 16106127360(total), 12915113984 (free), 3191013376 (used)


In [71]:
mem_after = (info.used - mem_before) / 1024**2
mem_after

418.0

#### Инференс

In [84]:
import os
os.chdir('./content')

In [87]:
# Run inference with the YOLOv8n model on the 'bus.jpg' image
results = model('train[7shard]_459.jpeg')


image 1/1 /content/train[7shard]_459.jpeg: 224x224 garbage_truck 1.00, chain_saw 0.00, golf_ball 0.00, tench 0.00, English_springer 0.00, 15.0ms
Speed: 2.0ms preprocess, 15.0ms inference, 0.1ms postprocess per image at shape (1, 3, 224, 224)
