## Exporting your model

In [1]:
import torch
from torchvision.models.mobilenetv2 import mobilenet_v2
from sparseml.pytorch.utils import export_onnx

model = mobilenet_v2(pretrained=True)
sample_batch = torch.randn((1, 3, 224, 224))
export_path = "custom_model.onnx"
export_onnx(model, sample_batch, export_path)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /home/jupyter/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 128MB/s]
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_onnx(graph, module)
  _C._jit_pass_onnx_remove_inplace_ops_for_

verbose: False, log level: Level.ERROR



#### Once the model is in an ONNX format, it is ready for inclusion in a CustomTaskPipeline or benchmarking. Examples for both are given below.

In [2]:
!wget -O basilica.jpg https://raw.githubusercontent.com/neuralmagic/deepsparse/main/src/deepsparse/yolo/sample_images/basilica.jpg

--2024-01-16 18:48:52--  https://raw.githubusercontent.com/neuralmagic/deepsparse/main/src/deepsparse/yolo/sample_images/basilica.jpg
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 129644 (127K) [image/jpeg]
Saving to: ‘basilica.jpg’


2024-01-16 18:48:52 (5.63 MB/s) - ‘basilica.jpg’ saved [129644/129644]



#### Next, the pre-processing and post-processing functions are defined, and the pipeline enabling the classification of the image file is instantiated:

In [4]:
from deepsparse.pipelines.custom_pipeline import CustomTaskPipeline
import torch
from torchvision import transforms
from PIL import Image

IMAGENET_RGB_MEANS = [0.485, 0.456, 0.406]
IMAGENET_RGB_STDS = [0.229, 0.224, 0.225]
preprocess_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_RGB_MEANS, std=IMAGENET_RGB_STDS),
])

def preprocess(inputs):
    with open(inputs, "rb") as img_file:
        img = Image.open(img_file)
        img = img.convert("RGB")
    img = preprocess_transforms(img)
    batch = torch.stack([img])
    return [batch.numpy()]  # deepsparse requires a list of numpy array inputs

def postprocess(outputs):
    return outputs  # list of numpy array outputs

custom_pipeline = CustomTaskPipeline(
    model_path="custom_model.onnx",
    process_inputs_fn=preprocess,
    process_outputs_fn=postprocess,
)
inference = custom_pipeline("basilica.jpg")
print(inference)

DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.1 COMMUNITY | (eff4f95d) (release) (optimized) (system=avx512_vnni, binary=avx512)


[array([[-1.24427509e+00, -6.90232396e-01, -9.11018848e-01,
        -1.80339050e+00, -9.55938458e-01, -1.78599551e-01,
        -9.48900759e-01,  5.79046249e-01,  5.30483127e-01,
        -9.51236248e-01, -1.45305657e+00, -1.16881335e+00,
        -1.01537859e+00, -1.58626676e+00, -1.77993357e+00,
        -1.12054729e+00, -1.06275916e+00, -4.73983049e-01,
        -3.57268453e-01, -1.04227448e+00, -1.60604203e+00,
        -8.27379823e-01, -1.89613664e+00,  1.12357140e-01,
        -1.39328527e+00, -1.27415466e+00, -1.01584363e+00,
        -1.16811788e+00, -9.78412747e-01, -8.32947016e-01,
        -1.06615472e+00, -1.17184174e+00, -6.63816929e-01,
        -5.12802839e-01, -1.66211039e-01, -7.96792388e-01,
         5.27858496e-01, -9.70269799e-01, -6.41587734e-01,
         3.62706035e-01, -6.84722424e-01, -7.91593075e-01,
        -9.29240823e-01, -6.46526992e-01, -5.59352160e-01,
        -5.99325120e-01, -6.20891452e-01, -7.86835134e-01,
        -1.58240438e+00, -1.07925463e+00, -6.12410367e-

## Benchmarking

In [5]:
!deepsparse.benchmark custom_model.onnx

2024-01-16 18:50:55 deepsparse.benchmark.helpers INFO     Thread pinning to cores enabled
DeepSparse, Copyright 2021-present / Neuralmagic, Inc. version: 1.6.1 COMMUNITY | (eff4f95d) (release) (optimized) (system=avx512_vnni, binary=avx512)
2024-01-16 18:50:57 deepsparse.benchmark.benchmark_model INFO     deepsparse.engine.Engine:
	onnx_file_path: custom_model.onnx
	batch_size: 1
	num_cores: 4
	num_streams: 1
	scheduler: Scheduler.default
	fraction_of_supported_ops: 0.6579
	cpu_avx_type: avx512
	cpu_vnni: True
2024-01-16 18:50:57 deepsparse.utils.onnx INFO     Generating input 'input', type = float32, shape = [1, 3, 224, 224]
2024-01-16 18:50:57 deepsparse.benchmark.benchmark_model INFO     Starting 'singlestream' performance measurements for 10 seconds
Original Model Path: custom_model.onnx
Batch Size: 1
Scenario: sync
Throughput (items/sec): 76.3147
Latency Mean (ms/batch): 13.0925
Latency Median (ms/batch): 12.6916
Latency Std (ms/batch): 1.0308
Iterations: 764
