# Методы компрессии нейронных сетей

## Лекция №5 - Экспорт моделей в ращличные форматы Tensort, ONNX
- Сравнение производительности сжатых моделей с экспортированными в стандартные форматы
- Практика - Примеры с экспортом моделей в различные форматы

## ДЗ №5
Экспорт своих моделей и замер производительности

## Домашняя работа
Рассматривается кастомная сверточная нейронная сеть.

Проверяются следующие метрики производительности:
- вес файла модели;
- время инференса.

In [None]:
!pip install torchsummary
!pip install onnxruntime-gpu==1.15 onnx datasets tensorrt==8.6
!pip install openvino-dev

In [2]:
import time
import os

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchsummary import summary

import torch.onnx
import onnx
import onnxruntime

from openvino.runtime import serialize
from openvino.tools import mo
from openvino.runtime import Core

import torchvision
from torchvision import transforms

from tqdm.auto import trange

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 1024

In [3]:
def train(model, n_epochs=10):
  model.to(device)
  optim = torch.optim.AdamW(model.parameters())

  best_epoch, best_accuracy = -1, 0
  for epoch in trange(n_epochs):
    model.train()
    for input, target in train_loader:
      input, target = input.to(device), target.to(device)
      pred = model(input)
      loss = F.cross_entropy(pred, target)

      optim.zero_grad()
      loss.backward()
      optim.step()

    model.eval()
    correct, total = 0, 0
    for input, target in test_loader:
      input, target = input.to(device), target.to(device)
      pred = model(input)
      pred = pred.argmax(dim=1)
      correct += (pred == target).sum()
      total += target.numel()

    accuracy = 100 * correct / total
    print(f"Epoch {epoch}: accuracy {accuracy:.1f}%")
    if accuracy > best_accuracy:
      best_epoch = epoch
      best_accuracy = accuracy

  print(f"Best accuracy {best_accuracy:.1f}% after epoch {best_epoch}")

### Загрузка датасета

In [4]:
train_dataset = torchvision.datasets.CIFAR10(
    root="dataset/",
    train=True,
    transform=transforms.ToTensor(),
    download=True
)

test_dataset = torchvision.datasets.CIFAR10(
  root="dataset/",
  train=False,
  transform=transforms.ToTensor(),
  download=True
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to dataset/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 35026417.52it/s]


Extracting dataset/cifar-10-python.tar.gz to dataset/
Files already downloaded and verified


### Модель

In [5]:
class CNN_Block(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.block = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=2, padding=1, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.ReLU()
    )

  def forward(self, input):
    return self.block(input)

In [6]:
class CNN(nn.Module):
  def __init__(self, in_channels=3, inter_channels=None, image_size=32, n_classes=10):
    super().__init__()

    if inter_channels is None:
      inter_channels = [64, 128, 256]

    model = []
    for out_channels in inter_channels:
      model.append(CNN_Block(in_channels, out_channels))
      in_channels = out_channels

    model.append(nn.Flatten())

    image_size = int(image_size / (2 ** len(inter_channels)))
    in_features = out_channels * image_size * image_size
    out_features = int(in_features / 4)
    model.append(nn.Linear(in_features, out_features))
    model.append(nn.ReLU())

    in_features = out_features
    out_features = n_classes
    model.append(nn.Linear(in_features, out_features))

    self.model = nn.Sequential(*model)

  def forward(self, input):
    return self.model(input)

#### PyTorch

In [7]:
# model = CNN()
# summary(model, (3, 32, 32), device='cpu')

In [9]:
# train(model)
# torch.save(model, model_name)
" ---------------------------"
model_name = "model.pt"
model = torch.load(model_name)

##### Размер модели

In [8]:
file_size_bytes = os.path.getsize(model_path)
file_size_mb = file_size_bytes / (1024 * 1024) # Перевод из байтов в мегабайты

print(f"Размер модели: {file_size_mb} MB")

Размер модели: 20.43988609313965 MB


##### Инференс модели

In [20]:
model.to(device)

start = time.time()
for input, target in train_loader:
  input, target = input.to(device), target.to(device)
  _ = model(input)
  break
total_time_model = (time.time() - start) / 1024 * 1000


print(f"Инференс модели: {total_time_model} ms")

Инференс модели: 0.15114480629563332 ms


#### ONNX

In [12]:
dummy_input = torch.randn(1024, 3, 32, 32)
onnx_path = "model.onnx"
torch.onnx.export(model.to('cpu'), dummy_input, onnx_path,
                  opset_version=14,
                  input_names=["input"],
                  output_names=["output"],
                  dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})

##### Размер модели

In [13]:
file_size_bytes = os.path.getsize(onnx_path)
file_size_mb = file_size_bytes / (1024 * 1024) # Перевод из байтов в мегабайты

print(f"Размер модели: {file_size_mb} MB")

Размер модели: 20.415644645690918 MB


##### Инференс модели

In [14]:
onnx_model = onnxruntime.InferenceSession(onnx_path, providers=['CUDAExecutionProvider'])

In [21]:
start = time.time()
for inputs, target in train_loader:
  inputs, target = inputs, target
  inputs = inputs.numpy()
  _ = onnx_model.run(None, {"input": inputs})[0]
  break
total_time_model = (time.time() - start) / 1024 * 1000


print(f"Инференс модели: {total_time_model} ms")

Инференс модели: 0.19574956968426704 ms


#### OpenVino

In [16]:
ov_model = mo.convert_model('model.onnx', compress_to_fp16=False)
serialize(ov_model, './openvino_model/model.xml')

##### Размер модели

In [17]:
file_size_bytes = os.path.getsize('/content/openvino_model/model.bin')
file_size_mb = file_size_bytes / (1024 * 1024) # Перевод из байтов в мегабайты

print(f"Размер модели: {file_size_mb} MB")

Размер модели: 20.41240692138672 MB


##### Инференс модели

In [18]:
core = Core()
compiled_model = core.compile_model(ov_model)

In [22]:
start = time.time()
for inputs, target in train_loader:
  inputs, target = inputs, target
  _ = compiled_model(inputs)[0]
  break
total_time_model = (time.time() - start) / 1024 * 1000


print(f"Инференс модели: {total_time_model} ms")

Инференс модели: 2.7143568731844425 ms
