Skip to content
This repository was archived by the owner on Jun 3, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ensure_newline_before_comments = True
force_grid_wrap = 0
include_trailing_comma = True
known_first_party = deepsparse,sparsezoo
known_third_party = numpy,onnx,requests,onnxruntime,flask,flask_cors
known_third_party = numpy,onnx,requests,onnxruntime,flask,flask_cors,tqdm
sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER

line_length = 88
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
binary_regexes = ["*/*.so", "*/*.so.*", "*.bin", "*/*.bin"]


_deps = ["numpy>=1.16.3", "onnx>=1.5.0,<1.8.0", "requests>=2.0.0"]
_deps = ["numpy>=1.16.3", "onnx>=1.5.0,<1.8.0", "requests>=2.0.0", "tqdm>=4.0.0"]

_nm_deps = [f"{'sparsezoo-nightly' if _NIGHTLY else 'sparsezoo'}~={_VERSION}"]

Expand Down
64 changes: 49 additions & 15 deletions src/deepsparse/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,19 @@
import numpy

from deepsparse.benchmark import BenchmarkResults
from tqdm.auto import tqdm


try:
from sparsezoo import Zoo
from sparsezoo.objects import File, Model
except Exception:

sparsezoo_import_error = None
except Exception as sparsezoo_err:
Zoo = None
Model = object
File = object
sparsezoo_import_error = sparsezoo_err

try:
# flake8: noqa
Expand All @@ -55,9 +61,13 @@ def _model_to_path(model: Union[str, Model, File]) -> str:
if not model:
raise ValueError("model must be a path, sparsezoo.Model, or sparsezoo.File")

if isinstance(model, str):
pass
elif Model is not object and isinstance(model, Model):
if isinstance(model, str) and model.startswith("zoo:"):
# load SparseZoo Model from stub
if sparsezoo_import_error is not None:
raise sparsezoo_import_error
model = Zoo.load_model_from_stub(model)

if Model is not object and isinstance(model, Model):
# default to the main onnx file for the model
model = model.onnx_file.downloaded_path()
elif File is not object and isinstance(model, File):
Expand Down Expand Up @@ -105,8 +115,9 @@ class Engine(object):
| # create an engine for batch size 1 on all available cores
| engine = Engine("path/to/onnx", batch_size=1, num_cores=None)

:param model: Either a path to the model's onnx file, a sparsezoo Model object,
or a sparsezoo ONNX File object that defines the neural network
:param model: Either a path to the model's onnx file, a SparseZoo model stub
prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
object that defines the neural network
:param batch_size: The batch size of the inputs to be used with the engine
:param num_cores: The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
Expand Down Expand Up @@ -324,6 +335,7 @@ def benchmark(
num_warmup_iterations: int = 5,
include_inputs: bool = False,
include_outputs: bool = False,
show_progress: bool = False,
) -> BenchmarkResults:
"""
A convenience function for quickly benchmarking the instantiated model
Expand All @@ -342,6 +354,7 @@ def benchmark(
will be added to the results. Default is False
:param include_outputs: If True, outputs from forward passes during benchmarking
will be added to the results. Default is False
:param show_progress: If True, will display a progress bar. Default is False
:return: the results of benchmarking
"""
# define data loader
Expand All @@ -355,6 +368,7 @@ def _infinite_loader():
num_warmup_iterations=num_warmup_iterations,
include_inputs=include_inputs,
include_outputs=include_outputs,
show_progress=show_progress,
)

def benchmark_loader(
Expand All @@ -364,6 +378,7 @@ def benchmark_loader(
num_warmup_iterations: int = 5,
include_inputs: bool = False,
include_outputs: bool = False,
show_progress: bool = False,
) -> BenchmarkResults:
"""
A convenience function for quickly benchmarking the instantiated model
Expand All @@ -382,6 +397,7 @@ def benchmark_loader(
will be added to the results. Default is False
:param include_outputs: If True, outputs from forward passes during benchmarking
will be added to the results. Default is False
:param show_progress: If True, will display a progress bar. Default is False
:return: the results of benchmarking
"""
assert num_iterations >= 1 and num_warmup_iterations >= 0, (
Expand All @@ -391,13 +407,15 @@ def benchmark_loader(
completed_iterations = 0
results = BenchmarkResults()

if show_progress:
progress_bar = tqdm(total=num_iterations)

while completed_iterations < num_warmup_iterations + num_iterations:
for batch in loader:
# run benchmark
start = time.time()
out = self.run(batch)
end = time.time()
completed_iterations += 1

if completed_iterations >= num_warmup_iterations:
# update results if warmup iterations are completed
Expand All @@ -408,10 +426,17 @@ def benchmark_loader(
inputs=batch if include_inputs else None,
outputs=out if include_outputs else None,
)
if show_progress:
progress_bar.update(1)

completed_iterations += 1

if completed_iterations >= num_warmup_iterations + num_iterations:
break

if show_progress:
progress_bar.close()

return results

def _validate_inputs(self, inp: List[numpy.ndarray]):
Expand Down Expand Up @@ -453,8 +478,9 @@ def compile_model(
Gives defaults of batch_size == 1 and num_cores == None
(will use all physical cores available on a single socket).

:param model: Either a path to the model's onnx file, a sparsezoo Model object,
or a sparsezoo ONNX File object that defines the neural network
:param model: Either a path to the model's onnx file, a SparseZoo model stub
prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
object that defines the neural network
:param batch_size: The batch size of the inputs to be used with the model
:param num_cores: The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
Expand All @@ -473,15 +499,17 @@ def benchmark_model(
num_warmup_iterations: int = 5,
include_inputs: bool = False,
include_outputs: bool = False,
show_progress: bool = False,
) -> BenchmarkResults:
"""
Convenience function to benchmark a model in the DeepSparse Engine
from an ONNX file for inference.
Gives defaults of batch_size == 1 and num_cores == None
(will use all physical cores available on a single socket).

:param model: Either a path to the model's onnx file, a sparsezoo Model object,
or a sparsezoo ONNX File object that defines the neural network
:param model: Either a path to the model's onnx file, a SparseZoo model stub
prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
object that defines the neural network
:param batch_size: The batch size of the inputs to be used with the model
:param num_cores: The number of physical cores to run the model on.
Pass None or 0 to run on the max number of cores
Expand All @@ -498,12 +526,18 @@ def benchmark_model(
will be added to the results. Default is False
:param include_outputs: If True, outputs from forward passes during benchmarking
will be added to the results. Default is False
:param show_progress: If True, will display a progress bar. Default is False
:return: the results of benchmarking
"""
model = compile_model(model, batch_size, num_cores)

return model.benchmark(
inp, num_iterations, num_warmup_iterations, include_inputs, include_outputs
inp,
num_iterations,
num_warmup_iterations,
include_inputs,
include_outputs,
show_progress,
)


Expand All @@ -524,9 +558,9 @@ def analyze_model(
Gives defaults of batch_size == 1 and num_cores == None
(will use all physical cores available on a single socket).

:param model: Either a path to the model's onnx file, a sparsezoo Model object,
or a sparsezoo ONNX File object that defines the neural network
graph definition to analyze
:param model: Either a path to the model's onnx file, a SparseZoo model stub
prefixed by 'zoo:', a SparseZoo Model object, or a SparseZoo ONNX File
object that defines the neural network graph definition to analyze
:param inp: The list of inputs to pass to the engine for analyzing inference.
The expected order is the inputs order as defined in the ONNX graph.
:param batch_size: The batch size of the inputs to be used with the model
Expand Down