From d2f263bdc9232d03216ff9390634b2e8e2041f79 Mon Sep 17 00:00:00 2001
From: Bill Nell <bill@neuralmagic.com>
Date: Tue, 16 Feb 2021 16:03:40 -0500
Subject: [PATCH 1/5] Add support for batch splitting

---
 src/deepsparse/engine.py | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py
index a794b2ecd9..1ae64cbbb3 100644
--- a/src/deepsparse/engine.py
+++ b/src/deepsparse/engine.py
@@ -33,7 +33,7 @@
 
 try:
     # flake8: noqa
-    from deepsparse.cpu import cpu_details
+    from deepsparse.cpu import cpu_architecture
     from deepsparse.lib import init_deepsparse_lib
     from deepsparse.version import *
 except ImportError:
@@ -46,7 +46,11 @@
 __all__ = ["Engine", "compile_model", "benchmark_model", "analyze_model"]
 
 
-CORES_PER_SOCKET, AVX_TYPE, VNNI = cpu_details()
+ARCH = cpu_architecture()
+CORES_PER_SOCKET = ARCH.available_cores_per_socket
+NUM_SOCKETS = ARCH.available_sockets
+AVX_TYPE = ARCH.isa
+VNNI = ARCH.vnni
 
 LIB = init_deepsparse_lib()
 
@@ -90,6 +94,16 @@ def _validate_num_cores(num_cores: Union[None, int]) -> int:
     return num_cores
 
 
+def _validate_num_sockets(num_sockets: Union[None, int]) -> int:
+    if not num_sockets:
+        num_sockets = NUM_SOCKETS
+
+    if num_sockets < 1:
+        raise ValueError("num_sockets must be greater than 0")
+
+    return num_sockets
+
+
 class Engine(object):
     """
     Create a new DeepSparse Engine that compiles the given onnx file
@@ -113,15 +127,20 @@ class Engine(object):
         in one socket for the current machine, default None
     """
 
-    def __init__(self, model: Union[str, Model, File], batch_size: int, num_cores: int):
+    def __init__(
+        self, model: Union[str, Model, File], batch_size: int, num_cores: int, num_sockets: int,
+        use_batch_splitting: bool = True
+    ):
         self._model_path = _model_to_path(model)
         self._batch_size = _validate_batch_size(batch_size)
         self._num_cores = _validate_num_cores(num_cores)
-        self._num_sockets = 1  # only single socket is supported currently
+        self._num_sockets = _validate_num_sockets(num_sockets)
+        self._use_batch_splitting = use_batch_splitting
         self._cpu_avx_type = AVX_TYPE
         self._cpu_vnni = VNNI
         self._eng_net = LIB.deepsparse_engine(
-            self._model_path, self._batch_size, self._num_cores, self._num_sockets
+            self._model_path, self._batch_size, self._num_cores, self._num_sockets,
+            self._use_batch_splitting
         )
 
     def __call__(
@@ -439,13 +458,15 @@ def _properties_dict(self) -> Dict:
             "batch_size": self._batch_size,
             "num_cores": self._num_cores,
             "num_sockets": self._num_sockets,
+            "use_batch_splitting": self._use_bactch_splitting,
             "cpu_avx_type": self._cpu_avx_type,
             "cpu_vnni": self._cpu_vnni,
         }
 
 
 def compile_model(
-    model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None
+    model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None, num_sockets: int = None,
+    use_batch_splitting: bool = False
 ) -> Engine:
     """
     Convenience function to compile a model in the DeepSparse Engine
@@ -461,7 +482,7 @@ def compile_model(
         in one socket for the current machine, default None
     :return: The created Engine after compiling the model
     """
-    return Engine(model, batch_size, num_cores)
+    return Engine(model, batch_size, num_cores, num_sockets, use_batch_splitting)
 
 
 def benchmark_model(
@@ -553,7 +574,7 @@ def analyze_model(
     num_cores = _validate_num_cores(num_cores)
     batch_size = _validate_batch_size(batch_size)
     num_sockets = 1
-    eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets)
+    eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets, True)
 
     return eng_net.benchmark(
         inp,

From 2a55fda62d679993629007fad5d37ef47f42ace7 Mon Sep 17 00:00:00 2001
From: Mark Kurtz <mark@neuralmagic.com>
Date: Tue, 23 Feb 2021 12:13:23 -0500
Subject: [PATCH 2/5] Update for 0.1.1 release (#49)

- update python version to 0.1.1
- setup.py add in version parts and _VERSION_MAJOR_MINOR for more flexibility with dependencies between neural magic packages
---
 setup.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index ea9a8594f9..a73bb1f258 100644
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,9 @@
 
 
 _PACKAGE_NAME = "deepsparse"
-_VERSION = "0.1.0"
+_VERSION = "0.1.1"
+_VERSION_MAJOR, _VERSION_MINOR, _VERSION_BUG = _VERSION.split(".")
+_VERSION_MAJOR_MINOR = f"{_VERSION_MAJOR}.{_VERSION_MINOR}"
 _NIGHTLY = "nightly" in sys.argv
 
 if _NIGHTLY:
@@ -40,7 +42,9 @@
 
 _deps = ["numpy>=1.16.3", "onnx>=1.5.0,<1.8.0", "requests>=2.0.0"]
 
-_nm_deps = [f"{'sparsezoo-nightly' if _NIGHTLY else 'sparsezoo'}~={_VERSION}"]
+_nm_deps = [
+    f"{'sparsezoo-nightly' if _NIGHTLY else 'sparsezoo'}~={_VERSION_MAJOR_MINOR}"
+]
 
 _dev_deps = [
     "black>=20.8b1",

From c906a06de0054dd3f8d29cc709f8cf2aa3240397 Mon Sep 17 00:00:00 2001
From: Bill Nell <bill@neuralmagic.com>
Date: Tue, 16 Feb 2021 16:03:40 -0500
Subject: [PATCH 3/5] Add support for batch splitting

---
 src/deepsparse/engine.py | 44 ++++++++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py
index 1ae64cbbb3..ae7926df43 100644
--- a/src/deepsparse/engine.py
+++ b/src/deepsparse/engine.py
@@ -125,11 +125,19 @@ class Engine(object):
     :param num_cores: The number of physical cores to run the model on.
         Pass None or 0 to run on the max number of cores
         in one socket for the current machine, default None
+    :param num_sockets: The number of physical sockets to run the model on.
+        Pass None or 0 to run on the max number of sockets for the
+        current machine, default None
+    :param use_batch_splitting: Manually control whether batch splitting is
+        enabled when running the model.  When True, the model is split into
+        batch_size/num_sockets sections where each section is run on a separate
+        socket.  When False, batch splitting is disabled.  If set to None, batch
+        splitting is automatically enabled when num_sockets > 1, default None
     """
 
     def __init__(
-        self, model: Union[str, Model, File], batch_size: int, num_cores: int, num_sockets: int,
-        use_batch_splitting: bool = True
+        self, model: Union[str, Model, File], batch_size: int, num_cores: int,
+        num_sockets: int = None, use_batch_splitting: bool = None
     ):
         self._model_path = _model_to_path(model)
         self._batch_size = _validate_batch_size(batch_size)
@@ -140,7 +148,7 @@ def __init__(
         self._cpu_vnni = VNNI
         self._eng_net = LIB.deepsparse_engine(
             self._model_path, self._batch_size, self._num_cores, self._num_sockets,
-            self._use_batch_splitting
+            True if self._use_batch_splitting or self._num_sockets > 1 else False
         )
 
     def __call__(
@@ -465,8 +473,8 @@ def _properties_dict(self) -> Dict:
 
 
 def compile_model(
-    model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None, num_sockets: int = None,
-    use_batch_splitting: bool = False
+    model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None,
+    num_sockets: int = None, use_batch_splitting: bool = None
 ) -> Engine:
     """
     Convenience function to compile a model in the DeepSparse Engine
@@ -480,6 +488,14 @@ def compile_model(
     :param num_cores: The number of physical cores to run the model on.
         Pass None or 0 to run on the max number of cores
         in one socket for the current machine, default None
+    :param num_sockets: The number of physical sockets to run the model on.
+        Pass None or 0 to run on the max number of sockets for the
+        current machine, default None
+    :param use_batch_splitting: Manually control whether batch splitting is
+        enabled when running the model.  When True, the model is split into
+        batch_size/num_sockets sections where each section is run on a separate
+        socket.  When False, batch splitting is disabled.  If set to None, batch
+        splitting is automatically enabled when num_sockets > 1, default None
     :return: The created Engine after compiling the model
     """
     return Engine(model, batch_size, num_cores, num_sockets, use_batch_splitting)
@@ -494,6 +510,8 @@ def benchmark_model(
     num_warmup_iterations: int = 5,
     include_inputs: bool = False,
     include_outputs: bool = False,
+    num_sockets: int = None,
+    use_batch_splitting: bool = None,
 ) -> BenchmarkResults:
     """
     Convenience function to benchmark a model in the DeepSparse Engine
@@ -521,7 +539,7 @@ def benchmark_model(
         will be added to the results. Default is False
     :return: the results of benchmarking
     """
-    model = compile_model(model, batch_size, num_cores)
+    model = compile_model(model, batch_size, num_cores, num_sockets, use_batch_splitting)
 
     return model.benchmark(
         inp, num_iterations, num_warmup_iterations, include_inputs, include_outputs
@@ -538,6 +556,8 @@ def analyze_model(
     optimization_level: int = 1,
     imposed_as: Optional[float] = None,
     imposed_ks: Optional[float] = None,
+    num_sockets: int = None,
+    use_batch_splitting: bool = None
 ) -> dict:
     """
     Function to analyze a model's performance in the DeepSparse Engine.
@@ -568,13 +588,21 @@ def analyze_model(
         Will force all prunable layers in the graph to have weights with
         this desired sparsity level (percentage of 0's in the tensor).
         Beneficial for seeing how pruning affects the performance of the model.
+    :param use_batch_splitting: Manually control whether batch splitting is
+        enabled when running the model.  When True, the model is split into
+        batch_size/num_sockets sections where each section is run on a separate
+        socket.  When False, batch splitting is disabled.  If set to None, batch
+        splitting is automatically enabled when num_sockets > 1, default None
     :return: the analysis structure containing the performance details of each layer
     """
     model = _model_to_path(model)
     num_cores = _validate_num_cores(num_cores)
     batch_size = _validate_batch_size(batch_size)
-    num_sockets = 1
-    eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets, True)
+    num_sockets = _validate_num_sockets(num_sockets)
+    use_batch_splitting = True if use_batch_splitting or num_sockets > 1 else False
+    eng_net = LIB.deepsparse_engine(
+        model, batch_size, num_cores, num_sockets, use_batch_splitting
+    )
 
     return eng_net.benchmark(
         inp,

From 9a850ce863cfc4eec077aa7f420f2f04dcc4349a Mon Sep 17 00:00:00 2001
From: Bill Nell <bill@neuralmagic.com>
Date: Tue, 23 Feb 2021 15:34:49 -0500
Subject: [PATCH 4/5] Remove use_batch_splitting parameter

---
 src/deepsparse/engine.py | 45 +++++++++++++---------------------------
 1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py
index ae7926df43..814ce4f5a7 100644
--- a/src/deepsparse/engine.py
+++ b/src/deepsparse/engine.py
@@ -128,27 +128,20 @@ class Engine(object):
     :param num_sockets: The number of physical sockets to run the model on.
         Pass None or 0 to run on the max number of sockets for the
         current machine, default None
-    :param use_batch_splitting: Manually control whether batch splitting is
-        enabled when running the model.  When True, the model is split into
-        batch_size/num_sockets sections where each section is run on a separate
-        socket.  When False, batch splitting is disabled.  If set to None, batch
-        splitting is automatically enabled when num_sockets > 1, default None
     """
 
     def __init__(
         self, model: Union[str, Model, File], batch_size: int, num_cores: int,
-        num_sockets: int = None, use_batch_splitting: bool = None
+        num_sockets: int = None
     ):
         self._model_path = _model_to_path(model)
         self._batch_size = _validate_batch_size(batch_size)
         self._num_cores = _validate_num_cores(num_cores)
         self._num_sockets = _validate_num_sockets(num_sockets)
-        self._use_batch_splitting = use_batch_splitting
         self._cpu_avx_type = AVX_TYPE
         self._cpu_vnni = VNNI
         self._eng_net = LIB.deepsparse_engine(
-            self._model_path, self._batch_size, self._num_cores, self._num_sockets,
-            True if self._use_batch_splitting or self._num_sockets > 1 else False
+            self._model_path, self._batch_size, self._num_cores, self._num_sockets
         )
 
     def __call__(
@@ -466,7 +459,6 @@ def _properties_dict(self) -> Dict:
             "batch_size": self._batch_size,
             "num_cores": self._num_cores,
             "num_sockets": self._num_sockets,
-            "use_batch_splitting": self._use_bactch_splitting,
             "cpu_avx_type": self._cpu_avx_type,
             "cpu_vnni": self._cpu_vnni,
         }
@@ -474,7 +466,7 @@ def _properties_dict(self) -> Dict:
 
 def compile_model(
     model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None,
-    num_sockets: int = None, use_batch_splitting: bool = None
+    num_sockets: int = None
 ) -> Engine:
     """
     Convenience function to compile a model in the DeepSparse Engine
@@ -491,14 +483,9 @@ def compile_model(
     :param num_sockets: The number of physical sockets to run the model on.
         Pass None or 0 to run on the max number of sockets for the
         current machine, default None
-    :param use_batch_splitting: Manually control whether batch splitting is
-        enabled when running the model.  When True, the model is split into
-        batch_size/num_sockets sections where each section is run on a separate
-        socket.  When False, batch splitting is disabled.  If set to None, batch
-        splitting is automatically enabled when num_sockets > 1, default None
     :return: The created Engine after compiling the model
     """
-    return Engine(model, batch_size, num_cores, num_sockets, use_batch_splitting)
+    return Engine(model, batch_size, num_cores, num_sockets)
 
 
 def benchmark_model(
@@ -510,8 +497,7 @@ def benchmark_model(
     num_warmup_iterations: int = 5,
     include_inputs: bool = False,
     include_outputs: bool = False,
-    num_sockets: int = None,
-    use_batch_splitting: bool = None,
+    num_sockets: int = None
 ) -> BenchmarkResults:
     """
     Convenience function to benchmark a model in the DeepSparse Engine
@@ -537,9 +523,12 @@ def benchmark_model(
         will be added to the results. Default is False
     :param include_outputs: If True, outputs from forward passes during benchmarking
         will be added to the results. Default is False
+    :param num_sockets: The number of physical sockets to run the model on.
+        Pass None or 0 to run on the max number of sockets for the
+        current machine, default None
     :return: the results of benchmarking
     """
-    model = compile_model(model, batch_size, num_cores, num_sockets, use_batch_splitting)
+    model = compile_model(model, batch_size, num_cores, num_sockets)
 
     return model.benchmark(
         inp, num_iterations, num_warmup_iterations, include_inputs, include_outputs
@@ -556,8 +545,7 @@ def analyze_model(
     optimization_level: int = 1,
     imposed_as: Optional[float] = None,
     imposed_ks: Optional[float] = None,
-    num_sockets: int = None,
-    use_batch_splitting: bool = None
+    num_sockets: int = None
 ) -> dict:
     """
     Function to analyze a model's performance in the DeepSparse Engine.
@@ -588,21 +576,16 @@ def analyze_model(
         Will force all prunable layers in the graph to have weights with
         this desired sparsity level (percentage of 0's in the tensor).
         Beneficial for seeing how pruning affects the performance of the model.
-    :param use_batch_splitting: Manually control whether batch splitting is
-        enabled when running the model.  When True, the model is split into
-        batch_size/num_sockets sections where each section is run on a separate
-        socket.  When False, batch splitting is disabled.  If set to None, batch
-        splitting is automatically enabled when num_sockets > 1, default None
+    :param num_sockets: The number of physical sockets to run the model on.
+        Pass None or 0 to run on the max number of sockets for the
+        current machine, default None
     :return: the analysis structure containing the performance details of each layer
     """
     model = _model_to_path(model)
     num_cores = _validate_num_cores(num_cores)
     batch_size = _validate_batch_size(batch_size)
     num_sockets = _validate_num_sockets(num_sockets)
-    use_batch_splitting = True if use_batch_splitting or num_sockets > 1 else False
-    eng_net = LIB.deepsparse_engine(
-        model, batch_size, num_cores, num_sockets, use_batch_splitting
-    )
+    eng_net = LIB.deepsparse_engine(model, batch_size, num_cores, num_sockets)
 
     return eng_net.benchmark(
         inp,

From 7a6b61db55b73f4e9418367d0a746996555c5f34 Mon Sep 17 00:00:00 2001
From: Bill Nell <bill@neuralmagic.com>
Date: Thu, 25 Feb 2021 14:29:23 -0500
Subject: [PATCH 5/5] Run style on changes

---
 src/deepsparse/engine.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/deepsparse/engine.py b/src/deepsparse/engine.py
index f1f97dbf32..1335299fb7 100644
--- a/src/deepsparse/engine.py
+++ b/src/deepsparse/engine.py
@@ -21,9 +21,9 @@
 from typing import Dict, Iterable, List, Optional, Tuple, Union
 
 import numpy
+from tqdm.auto import tqdm
 
 from deepsparse.benchmark import BenchmarkResults
-from tqdm.auto import tqdm
 
 
 try:
@@ -142,8 +142,11 @@ class Engine(object):
     """
 
     def __init__(
-        self, model: Union[str, Model, File], batch_size: int, num_cores: int,
-        num_sockets: int = None
+        self,
+        model: Union[str, Model, File],
+        batch_size: int,
+        num_cores: int,
+        num_sockets: int = None,
     ):
         self._model_path = _model_to_path(model)
         self._batch_size = _validate_batch_size(batch_size)
@@ -490,8 +493,10 @@ def _properties_dict(self) -> Dict:
 
 
 def compile_model(
-    model: Union[str, Model, File], batch_size: int = 1, num_cores: int = None,
-    num_sockets: int = None
+    model: Union[str, Model, File],
+    batch_size: int = 1,
+    num_cores: int = None,
+    num_sockets: int = None,
 ) -> Engine:
     """
     Convenience function to compile a model in the DeepSparse Engine
@@ -524,7 +529,7 @@ def benchmark_model(
     include_inputs: bool = False,
     include_outputs: bool = False,
     show_progress: bool = False,
-    num_sockets: int = None
+    num_sockets: int = None,
 ) -> BenchmarkResults:
     """
     Convenience function to benchmark a model in the DeepSparse Engine
@@ -579,7 +584,7 @@ def analyze_model(
     optimization_level: int = 1,
     imposed_as: Optional[float] = None,
     imposed_ks: Optional[float] = None,
-    num_sockets: int = None
+    num_sockets: int = None,
 ) -> dict:
     """
     Function to analyze a model's performance in the DeepSparse Engine.