From d2f263bdc9232d03216ff9390634b2e8e2041f79 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Tue, 16 Feb 2021 16:03:40 -0500 Subject: [PATCH 1/5] Add support for batch splitting --- src/deepsparse/engine.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py index a794b2ecd9..1ae64cbbb3 100644 --- a/src/deepsparse/engine.py +++ b/src/deepsparse/engine.py @@ -33,7 +33,7 @@ try: # flake8: noqa - from deepsparse.cpu import cpu_details + from deepsparse.cpu import cpu_architecture from deepsparse.lib import init_deepsparse_lib from deepsparse.version import * except ImportError: @@ -46,7 +46,11 @@ __all__ = ["Engine", "compile_model", "benchmark_model", "analyze_model"] -CORES_PER_SOCKET, AVX_TYPE, VNNI = cpu_details() +ARCH = cpu_architecture() +CORES_PER_SOCKET = ARCH.available_cores_per_socket +NUM_SOCKETS = ARCH.available_sockets +AVX_TYPE = ARCH.isa +VNNI = ARCH.vnni LIB = init_deepsparse_lib() @@ -90,6 +94,16 @@ def _validate_num_cores(num_cores: Union[None, int]) -> int: return num_cores +def _validate_num_sockets(num_sockets: Union[None, int]) -> int: + if not num_sockets: + num_sockets = NUM_SOCKETS + + if num_sockets < 1: + raise ValueError("num_sockets must be greater than 0") + + return num_sockets + + class Engine(object): """ Create a new DeepSparse Engine that compiles the given onnx file @@ -113,15 +127,20 @@ class Engine(object): in one socket for the current machine, default None """ - def __init__(self, model: Union[str, Model, File], batch_size: int, num_cores: int): + def __init__( + self, model: Union[str, Model, File], batch_size: int, num_cores: int, num_sockets: int, + use_batch_splitting: bool = True + ): self._model_path = _model_to_path(model) self._batch_size = _validate_batch_size(batch_size) self._num_cores = _validate_num_cores(num_cores) - self._num_sockets = 1 # only single socket is supported currently + self._num_sockets = _validate_num_sockets(num_sockets) + self._use_batch_splitting = use_batch_splitting self._cpu_avx_type = AVX_TYPE self._cpu_vnni = VNNI self._eng_net = LIB.deepsparse_engine( - self._model_path, self._batch_size, self._num_cores, self._num_sockets + self._model_path, self._batch_size, self._num_cores, self._num_sockets, + self._use_batch_splitting ) def __call__( @@ -439,13 +458,15 @@ def _properties_dict(self) -> Dict: "batch_size": self._batch_size, "num_cores": self._num_cores, "num_sockets": self._num_sockets, + "use_batch_splitting": self._use_bactch_splitting, "cpu_avx_type": self._cpu_avx_type, "cpu_vnni": self._cpu_vnni, } def compile_model( - model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None + model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None, num_sockets: int = None, + use_batch_splitting: bool = False ) -> Engine: """ Convenience function to compile a model in the DeepSparse Engine @@ -461,7 +482,7 @@ def compile_model( in one socket for the current machine, default None :return: The created Engine after compiling the model """ - return Engine(model, batch_size, num_cores) + return Engine(model, batch_size, num_cores, num_sockets, use_batch_splitting) def benchmark_model( @@ -553,7 +574,7 @@ def analyze_model( num_cores = _validate_num_cores(num_cores) batch_size = _validate_batch_size(batch_size) num_sockets = 1 - eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets) + eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets, True) return eng_net.benchmark( inp, From 2a55fda62d679993629007fad5d37ef47f42ace7 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Tue, 23 Feb 2021 12:13:23 -0500 Subject: [PATCH 2/5] Update for 0.1.1 release (#49) - update python version to 0.1.1 - setup.py add in version parts and _VERSION_MAJOR_MINOR for more flexibility with dependencies between neural magic packages --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index ea9a8594f9..a73bb1f258 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,9 @@ _PACKAGE_NAME = "deepsparse" -_VERSION = "0.1.0" +_VERSION = "0.1.1" +_VERSION_MAJOR, _VERSION_MINOR, _VERSION_BUG = _VERSION.split(".") +_VERSION_MAJOR_MINOR = f"{_VERSION_MAJOR}.{_VERSION_MINOR}" _NIGHTLY = "nightly" in sys.argv if _NIGHTLY: @@ -40,7 +42,9 @@ _deps = ["numpy>=1.16.3", "onnx>=1.5.0,<1.8.0", "requests>=2.0.0"] -_nm_deps = [f"{'sparsezoo-nightly' if _NIGHTLY else 'sparsezoo'}~={_VERSION}"] +_nm_deps = [ + f"{'sparsezoo-nightly' if _NIGHTLY else 'sparsezoo'}~={_VERSION_MAJOR_MINOR}" +] _dev_deps = [ "black>=20.8b1", From c906a06de0054dd3f8d29cc709f8cf2aa3240397 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Tue, 16 Feb 2021 16:03:40 -0500 Subject: [PATCH 3/5] Add support for batch splitting --- src/deepsparse/engine.py | 44 ++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py index 1ae64cbbb3..ae7926df43 100644 --- a/src/deepsparse/engine.py +++ b/src/deepsparse/engine.py @@ -125,11 +125,19 @@ class Engine(object): :param num_cores: The number of physical cores to run the model on. Pass None or 0 to run on the max number of cores in one socket for the current machine, default None + :param num_sockets: The number of physical sockets to run the model on. + Pass None or 0 to run on the max number of sockets for the + current machine, default None + :param use_batch_splitting: Manually control whether batch splitting is + enabled when running the model. When True, the model is split into + batch_size/num_sockets sections where each section is run on a separate + socket. When False, batch splitting is disabled. If set to None, batch + splitting is automatically enabled when num_sockets > 1, default None """ def __init__( - self, model: Union[str, Model, File], batch_size: int, num_cores: int, num_sockets: int, - use_batch_splitting: bool = True + self, model: Union[str, Model, File], batch_size: int, num_cores: int, + num_sockets: int = None, use_batch_splitting: bool = None ): self._model_path = _model_to_path(model) self._batch_size = _validate_batch_size(batch_size) @@ -140,7 +148,7 @@ def __init__( self._cpu_vnni = VNNI self._eng_net = LIB.deepsparse_engine( self._model_path, self._batch_size, self._num_cores, self._num_sockets, - self._use_batch_splitting + True if self._use_batch_splitting or self._num_sockets > 1 else False ) def __call__( @@ -465,8 +473,8 @@ def _properties_dict(self) -> Dict: def compile_model( - model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None, num_sockets: int = None, - use_batch_splitting: bool = False + model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None, + num_sockets: int = None, use_batch_splitting: bool = None ) -> Engine: """ Convenience function to compile a model in the DeepSparse Engine @@ -480,6 +488,14 @@ def compile_model( :param num_cores: The number of physical cores to run the model on. Pass None or 0 to run on the max number of cores in one socket for the current machine, default None + :param num_sockets: The number of physical sockets to run the model on. + Pass None or 0 to run on the max number of sockets for the + current machine, default None + :param use_batch_splitting: Manually control whether batch splitting is + enabled when running the model. When True, the model is split into + batch_size/num_sockets sections where each section is run on a separate + socket. When False, batch splitting is disabled. If set to None, batch + splitting is automatically enabled when num_sockets > 1, default None :return: The created Engine after compiling the model """ return Engine(model, batch_size, num_cores, num_sockets, use_batch_splitting) @@ -494,6 +510,8 @@ def benchmark_model( num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, + num_sockets: int = None, + use_batch_splitting: bool = None, ) -> BenchmarkResults: """ Convenience function to benchmark a model in the DeepSparse Engine @@ -521,7 +539,7 @@ def benchmark_model( will be added to the results. Default is False :return: the results of benchmarking """ - model = compile_model(model, batch_size, num_cores) + model = compile_model(model, batch_size, num_cores, num_sockets, use_batch_splitting) return model.benchmark( inp, num_iterations, num_warmup_iterations, include_inputs, include_outputs @@ -538,6 +556,8 @@ def analyze_model( optimization_level: int = 1, imposed_as: Optional[float] = None, imposed_ks: Optional[float] = None, + num_sockets: int = None, + use_batch_splitting: bool = None ) -> dict: """ Function to analyze a model's performance in the DeepSparse Engine. @@ -568,13 +588,21 @@ def analyze_model( Will force all prunable layers in the graph to have weights with this desired sparsity level (percentage of 0's in the tensor). Beneficial for seeing how pruning affects the performance of the model. + :param use_batch_splitting: Manually control whether batch splitting is + enabled when running the model. When True, the model is split into + batch_size/num_sockets sections where each section is run on a separate + socket. When False, batch splitting is disabled. If set to None, batch + splitting is automatically enabled when num_sockets > 1, default None :return: the analysis structure containing the performance details of each layer """ model = _model_to_path(model) num_cores = _validate_num_cores(num_cores) batch_size = _validate_batch_size(batch_size) - num_sockets = 1 - eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets, True) + num_sockets = _validate_num_sockets(num_sockets) + use_batch_splitting = True if use_batch_splitting or num_sockets > 1 else False + eng_net = LIB.deepsparse_engine( + model, batch_size, num_cores, num_sockets, use_batch_splitting + ) return eng_net.benchmark( inp, From 9a850ce863cfc4eec077aa7f420f2f04dcc4349a Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Tue, 23 Feb 2021 15:34:49 -0500 Subject: [PATCH 4/5] Remove use_batch_splitting parameter --- src/deepsparse/engine.py | 45 +++++++++++++--------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py index ae7926df43..814ce4f5a7 100644 --- a/src/deepsparse/engine.py +++ b/src/deepsparse/engine.py @@ -128,27 +128,20 @@ class Engine(object): :param num_sockets: The number of physical sockets to run the model on. Pass None or 0 to run on the max number of sockets for the current machine, default None - :param use_batch_splitting: Manually control whether batch splitting is - enabled when running the model. When True, the model is split into - batch_size/num_sockets sections where each section is run on a separate - socket. When False, batch splitting is disabled. If set to None, batch - splitting is automatically enabled when num_sockets > 1, default None """ def __init__( self, model: Union[str, Model, File], batch_size: int, num_cores: int, - num_sockets: int = None, use_batch_splitting: bool = None + num_sockets: int = None ): self._model_path = _model_to_path(model) self._batch_size = _validate_batch_size(batch_size) self._num_cores = _validate_num_cores(num_cores) self._num_sockets = _validate_num_sockets(num_sockets) - self._use_batch_splitting = use_batch_splitting self._cpu_avx_type = AVX_TYPE self._cpu_vnni = VNNI self._eng_net = LIB.deepsparse_engine( - self._model_path, self._batch_size, self._num_cores, self._num_sockets, - True if self._use_batch_splitting or self._num_sockets > 1 else False + self._model_path, self._batch_size, self._num_cores, self._num_sockets ) def __call__( @@ -466,7 +459,6 @@ def _properties_dict(self) -> Dict: "batch_size": self._batch_size, "num_cores": self._num_cores, "num_sockets": self._num_sockets, - "use_batch_splitting": self._use_bactch_splitting, "cpu_avx_type": self._cpu_avx_type, "cpu_vnni": self._cpu_vnni, } @@ -474,7 +466,7 @@ def _properties_dict(self) -> Dict: def compile_model( model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None, - num_sockets: int = None, use_batch_splitting: bool = None + num_sockets: int = None ) -> Engine: """ Convenience function to compile a model in the DeepSparse Engine @@ -491,14 +483,9 @@ def compile_model( :param num_sockets: The number of physical sockets to run the model on. Pass None or 0 to run on the max number of sockets for the current machine, default None - :param use_batch_splitting: Manually control whether batch splitting is - enabled when running the model. When True, the model is split into - batch_size/num_sockets sections where each section is run on a separate - socket. When False, batch splitting is disabled. If set to None, batch - splitting is automatically enabled when num_sockets > 1, default None :return: The created Engine after compiling the model """ - return Engine(model, batch_size, num_cores, num_sockets, use_batch_splitting) + return Engine(model, batch_size, num_cores, num_sockets) def benchmark_model( @@ -510,8 +497,7 @@ def benchmark_model( num_warmup_iterations: int = 5, include_inputs: bool = False, include_outputs: bool = False, - num_sockets: int = None, - use_batch_splitting: bool = None, + num_sockets: int = None ) -> BenchmarkResults: """ Convenience function to benchmark a model in the DeepSparse Engine @@ -537,9 +523,12 @@ def benchmark_model( will be added to the results. Default is False :param include_outputs: If True, outputs from forward passes during benchmarking will be added to the results. Default is False + :param num_sockets: The number of physical sockets to run the model on. + Pass None or 0 to run on the max number of sockets for the + current machine, default None :return: the results of benchmarking """ - model = compile_model(model, batch_size, num_cores, num_sockets, use_batch_splitting) + model = compile_model(model, batch_size, num_cores, num_sockets) return model.benchmark( inp, num_iterations, num_warmup_iterations, include_inputs, include_outputs @@ -556,8 +545,7 @@ def analyze_model( optimization_level: int = 1, imposed_as: Optional[float] = None, imposed_ks: Optional[float] = None, - num_sockets: int = None, - use_batch_splitting: bool = None + num_sockets: int = None ) -> dict: """ Function to analyze a model's performance in the DeepSparse Engine. @@ -588,21 +576,16 @@ def analyze_model( Will force all prunable layers in the graph to have weights with this desired sparsity level (percentage of 0's in the tensor). Beneficial for seeing how pruning affects the performance of the model. - :param use_batch_splitting: Manually control whether batch splitting is - enabled when running the model. When True, the model is split into - batch_size/num_sockets sections where each section is run on a separate - socket. When False, batch splitting is disabled. If set to None, batch - splitting is automatically enabled when num_sockets > 1, default None + :param num_sockets: The number of physical sockets to run the model on. + Pass None or 0 to run on the max number of sockets for the + current machine, default None :return: the analysis structure containing the performance details of each layer """ model = _model_to_path(model) num_cores = _validate_num_cores(num_cores) batch_size = _validate_batch_size(batch_size) num_sockets = _validate_num_sockets(num_sockets) - use_batch_splitting = True if use_batch_splitting or num_sockets > 1 else False - eng_net = LIB.deepsparse_engine( - model, batch_size, num_cores, num_sockets, use_batch_splitting - ) + eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets) return eng_net.benchmark( inp, From 7a6b61db55b73f4e9418367d0a746996555c5f34 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Thu, 25 Feb 2021 14:29:23 -0500 Subject: [PATCH 5/5] Run style on changes --- src/deepsparse/engine.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py index f1f97dbf32..1335299fb7 100644 --- a/src/deepsparse/engine.py +++ b/src/deepsparse/engine.py @@ -21,9 +21,9 @@ from typing import Dict, Iterable, List, Optional, Tuple, Union import numpy +from tqdm.auto import tqdm from deepsparse.benchmark import BenchmarkResults -from tqdm.auto import tqdm try: @@ -142,8 +142,11 @@ class Engine(object): """ def __init__( - self, model: Union[str, Model, File], batch_size: int, num_cores: int, - num_sockets: int = None + self, + model: Union[str, Model, File], + batch_size: int, + num_cores: int, + num_sockets: int = None, ): self._model_path = _model_to_path(model) self._batch_size = _validate_batch_size(batch_size) @@ -490,8 +493,10 @@ def _properties_dict(self) -> Dict: def compile_model( - model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None, - num_sockets: int = None + model: Union[str, Model, File], + batch_size: int = 1, + num_cores: int = None, + num_sockets: int = None, ) -> Engine: """ Convenience function to compile a model in the DeepSparse Engine @@ -524,7 +529,7 @@ def benchmark_model( include_inputs: bool = False, include_outputs: bool = False, show_progress: bool = False, - num_sockets: int = None + num_sockets: int = None, ) -> BenchmarkResults: """ Convenience function to benchmark a model in the DeepSparse Engine @@ -579,7 +584,7 @@ def analyze_model( optimization_level: int = 1, imposed_as: Optional[float] = None, imposed_ks: Optional[float] = None, - num_sockets: int = None + num_sockets: int = None, ) -> dict: """ Function to analyze a model's performance in the DeepSparse Engine.