From 7f05c6d343163615ffc52b84aa4bba2a469f19d8 Mon Sep 17 00:00:00 2001
From: Michael Goin <michael@neuralmagic.com>
Date: Wed, 30 Mar 2022 17:23:21 -0400
Subject: [PATCH 1/4] Add `deepsparse.analyze`

---
 setup.py                                      |   1 +
 src/deepsparse/analyze.py                     | 258 ++++++++++++++++++
 .../benchmark_model/benchmark_model.py        |   1 +
 3 files changed, 260 insertions(+)
 create mode 100644 src/deepsparse/analyze.py

diff --git a/setup.py b/setup.py
index 204af8530d..855f119dd5 100644
--- a/setup.py
+++ b/setup.py
@@ -183,6 +183,7 @@ def _setup_entry_points() -> Dict:
         "console_scripts": [
             f"deepsparse.transformers.run_inference={data_api_entrypoint}",
             f"deepsparse.transformers.eval_downstream={eval_downstream}",
+            "deepsparse.analyze=deepsparse.analyze:main",
             "deepsparse.check_hardware=deepsparse.cpu:print_hardware_capability",
             "deepsparse.benchmark=deepsparse.benchmark_model.benchmark_model:main",
             "deepsparse.server=deepsparse.server.main:start_server",
diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py
new file mode 100644
index 0000000000..d4d7147388
--- /dev/null
+++ b/src/deepsparse/analyze.py
@@ -0,0 +1,258 @@
+
+
+import argparse
+import sys
+import pprint
+
+from deepsparse import analyze_model
+from deepsparse.utils import (
+    generate_random_inputs,
+    model_to_path,
+    override_onnx_input_shapes,
+    parse_input_shapes,
+)
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Analyze ONNX models in the DeepSparse Engine"
+    )
+
+    parser.add_argument(
+        "model_path",
+        type=str,
+        help="Path to an ONNX model file or SparseZoo model stub",
+    )
+    parser.add_argument(
+        "-wi",
+        "--num_warmup_iterations",
+        help="The number of warmup runs that will be executed before the \
+        actual benchmarking",
+        type=int,
+        default=0,
+    )
+    parser.add_argument(
+        "-bi",
+        "--num_iterations",
+        help="The number of times the benchmark will be run",
+        type=int,
+        default=1,
+    )
+    parser.add_argument(
+        "-ncores",
+        "--num_cores",
+        type=int,
+        default=None,
+        help=(
+            "The number of physical cores to run the analysis on, "
+            "defaults to all physical cores available on the system"
+        ),
+    )
+    parser.add_argument(
+        "-b",
+        "--batch_size",
+        help="The number of inputs that will run through the model at a time",
+        type=int,
+        default=1,
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        help="Print all of the benchmark info",
+        action="store_true",
+        default=False,
+    )
+    parser.add_argument(
+        "-ks",
+        "--kernel_sparsity",
+        help="Impose kernel sparsity for all convolutions. [0.0-1.0]",
+        type=float,
+    )
+    parser.add_argument(
+        "-ksf",
+        "--kernel_sparsity_file",
+        help="Filepath to per-layer kernel sparsities JSON",
+        type=str,
+    )
+    parser.add_argument(
+        "--optimization",
+        help="To enable or disable optimizations (Tensor Columns)",
+        type=bool,
+        default=True,
+    )
+    parser.add_argument(
+        "-i",
+        "--input_shapes",
+        help="Override the shapes of the inputs, "
+        'i.e. -shapes "[1,2,3],[4,5,6],[7,8,9]" results in '
+        "input0=[1,2,3] input1=[4,5,6] input2=[7,8,9]",
+        type=str,
+        default="",
+    )
+
+    return parser.parse_args()
+
+def layer_info_to_string(li, format_str):
+    if li["name"] == "sub_pyramid":
+        return format_str.format(li["name"], "[]", "[]", "[]", 0, 0, 0, 0, "")
+    else:
+        return format_str.format(
+            li["name"],
+            "{}".format(list(li["output_dims"].values())),
+            "{}".format(list(li["kernel_dims"].values())),
+            "{}".format(list(li["strides"].values())),
+            li["activation_sparsity"],
+            li["average_run_time_in_ms"],
+            li["average_utilization"] * 100.0,
+            li["average_teraflops_per_second"],
+            li["canonical_name"],
+        )
+
+
+def construct_layer_table(result):
+    table_str = (
+        "Name                        | OutDims                    | KerDims                    |"
+        + " Strides      | ActSpars | Time(ms) |  Util(%) | TFLOPS   | Canonical Name\n"
+    )
+    for li in result["layer_info"]:
+        table_str += layer_info_to_string(
+            li,
+            "{:28}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n",
+        )
+        for sub_li in li["sub_layer_info"]:
+            table_str += layer_info_to_string(
+                sub_li,
+                "  {:26}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n",
+            )
+
+    table_str += "Total Time(MS): {:05f}\n".format(result["average_total_time"])
+    table_str += "Items per second: {:05f}\n".format(result["items_per_second"])
+    table_str += "Batch Size: {}\n".format(result["batch_size"])
+    table_str += "Number of threads: {}\n".format(result["num_threads"])
+
+    return table_str
+
+
+def process_line_item(total_layer_time, detailed_layer_time, li, strip_name):
+    if "average_run_time_in_ms" not in li:
+        # nothing to process
+        return
+
+    layer_type = li["name"]
+    if strip_name:
+        # peel off unique number
+        layer_type = layer_type.rsplit("_", 1)[0]
+        # peel off ks percentage
+        layer_type = layer_type.rsplit("-", 1)[0]
+
+    avg_layer_time = li["average_run_time_in_ms"]
+
+    if layer_type in total_layer_time:
+        total_layer_time[layer_type] += avg_layer_time
+    else:
+        total_layer_time[layer_type] = avg_layer_time
+
+    # Record detailed layer types as well
+    if "kernel_dims" in li:
+        kerdims = list(li["kernel_dims"].values())
+        if kerdims:
+            detailed_layer_type = f"{layer_type}|kernel={kerdims}"
+            if detailed_layer_type in detailed_layer_time:
+                detailed_layer_time[detailed_layer_type] += avg_layer_time
+            else:
+                detailed_layer_time[detailed_layer_type] = avg_layer_time
+
+
+def construct_layer_statistics(result):
+    # Percentage Statistics
+    total_layer_time = {}
+    detailed_layer_time = {}
+    for li in result["layer_info"]:
+        if len(li["sub_layer_info"]) == 0:
+            process_line_item(total_layer_time, detailed_layer_time, li, True)
+        else:
+            for sli in li["sub_layer_info"]:
+                process_line_item(total_layer_time, detailed_layer_time, sli, False)
+
+    summed_total_time = 0.0
+    for k, v in total_layer_time.items():
+        summed_total_time += v
+
+    perc_str = "== Layer Breakdown ==\n"
+    perc_str += "Name                           | Summed Time | Percent Taken\n"
+    for name, val in total_layer_time.items():
+        # Print summary for this type of layer
+        perc_str += "{:30} | {:8.3f}    | {:4.2f}%\n".format(
+            name, val, (val / summed_total_time) * 100.0
+        )
+
+        # Do the same for any sub-types recorded (there can be none)
+        sublayers = [
+            (key.split("|", 1)[1], value)
+            for key, value in detailed_layer_time.items()
+            if name == key.split("|", 1)[0]
+        ]
+        for subname, subval in sublayers:
+            perc_str += "  {:28} | {:8.3f}    | {:4.2f}%\n".format(
+                subname, subval, (subval / summed_total_time) * 100.0
+            )
+
+    batch_size = int(result["batch_size"])
+    perc_str += "== Summed Total Time: {:.4f} ms\n".format(summed_total_time)
+    perc_str += "== Items per second: {:.4f}\n".format(
+        (1000.0 / summed_total_time) * batch_size
+    )
+
+    return perc_str
+
+
+def main():
+    args = parse_args()
+
+    random_input = args.random_input
+    num_warmup_iterations = args.num_warmup_iterations
+    num_iterations = args.num_iterations
+    num_cores = args.num_cores
+    batch_size = args.batch_size
+    imposed_activation_sparsity = args.activation_sparsity
+    imposed_kernel_sparsity = args.kernel_sparsity
+    imposed_kernel_sparsity_file = args.kernel_sparsity_file
+    optimization_level = args.optimization
+    input_shapes = parse_input_shapes(args.input_shapes)
+    verbose = args.verbose
+
+    # Imposed KS can take either a float or a file, so overwrite with file if we have it
+    if imposed_kernel_sparsity_file:
+        imposed_kernel_sparsity = imposed_kernel_sparsity_file
+
+    orig_model_path = args.model_path
+    model_path = model_to_path(args.model_path)
+
+    print("onnx_filename: " + str(model_path))
+
+    if input_shapes:
+        with override_onnx_input_shapes(model_path, input_shapes) as tmp_path:
+            input_list = generate_random_inputs(tmp_path, batch_size)
+    else:
+        input_list = generate_random_inputs(model_path, batch_size)
+
+    result = analyze_model(
+        model_path,
+        input_list,
+        batch_size=batch_size,
+        num_cores=num_cores,
+        num_iterations=num_iterations,
+        num_warmup_iterations=num_warmup_iterations,
+        optimization_level=optimization_level,
+        imposed_as=imposed_activation_sparsity,
+        imposed_ks=imposed_kernel_sparsity,
+        input_shapes=input_shapes,
+    )
+
+    if verbose:
+        pprint.pprint(result)
+
+    print(construct_layer_table(result))
+    print(construct_layer_statistics(result))
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/src/deepsparse/benchmark_model/benchmark_model.py b/src/deepsparse/benchmark_model/benchmark_model.py
index 1780423545..4ff874b0c6 100644
--- a/src/deepsparse/benchmark_model/benchmark_model.py
+++ b/src/deepsparse/benchmark_model/benchmark_model.py
@@ -133,6 +133,7 @@ def parse_args():
         help="The batch size to run the analysis for. Must be greater than 0",
     )
     parser.add_argument(
+        "-i",
         "-shapes",
         "--input_shapes",
         type=str,

From df2e1a3de7500c8c506aca4b558ad272a4eab214 Mon Sep 17 00:00:00 2001
From: Michael Goin <michael@neuralmagic.com>
Date: Wed, 30 Mar 2022 17:26:46 -0400
Subject: [PATCH 2/4] Comment

---
 src/deepsparse/analyze.py | 64 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 61 insertions(+), 3 deletions(-)

diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py
index d4d7147388..6bf0a751d2 100644
--- a/src/deepsparse/analyze.py
+++ b/src/deepsparse/analyze.py
@@ -1,8 +1,63 @@
-
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Analysis script for ONNX models with the DeepSparse engine.
+
+##########
+Command help:
+usage: deepsparse.analyze [-h] [-wi NUM_WARMUP_ITERATIONS]
+                          [-bi NUM_ITERATIONS] [-ncores NUM_CORES]
+                          [-b BATCH_SIZE] [-v] [-ks KERNEL_SPARSITY]
+                          [-ksf KERNEL_SPARSITY_FILE]
+                          [--optimization OPTIMIZATION] [-i INPUT_SHAPES]
+                          model_path
+
+Analyze ONNX models in the DeepSparse Engine
+
+positional arguments:
+  model_path            Path to an ONNX model file or SparseZoo model stub
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -wi NUM_WARMUP_ITERATIONS, --num_warmup_iterations NUM_WARMUP_ITERATIONS
+                        The number of warmup runs that will be executed before
+                        the actual benchmarking
+  -bi NUM_ITERATIONS, --num_iterations NUM_ITERATIONS
+                        The number of times the benchmark will be run
+  -ncores NUM_CORES, --num_cores NUM_CORES
+                        The number of physical cores to run the analysis on,
+                        defaults to all physical cores available on the system
+  -b BATCH_SIZE, --batch_size BATCH_SIZE
+                        The number of inputs that will run through the model
+                        at a time
+  -v, --verbose         Print all of the benchmark info
+  -ks KERNEL_SPARSITY, --kernel_sparsity KERNEL_SPARSITY
+                        Impose kernel sparsity for all convolutions. [0.0-1.0]
+  -ksf KERNEL_SPARSITY_FILE, --kernel_sparsity_file KERNEL_SPARSITY_FILE
+                        Filepath to per-layer kernel sparsities JSON
+  --optimization OPTIMIZATION
+                        To enable or disable optimizations (Tensor Columns)
+  -i INPUT_SHAPES, --input_shapes INPUT_SHAPES
+                        Override the shapes of the inputs, i.e. -shapes
+                        "[1,2,3],[4,5,6],[7,8,9]" results in input0=[1,2,3]
+                        input1=[4,5,6] input2=[7,8,9]
+"""
 
 import argparse
-import sys
 import pprint
+import sys
 
 from deepsparse import analyze_model
 from deepsparse.utils import (
@@ -12,6 +67,7 @@
     parse_input_shapes,
 )
 
+
 def parse_args():
     parser = argparse.ArgumentParser(
         description="Analyze ONNX models in the DeepSparse Engine"
@@ -91,6 +147,7 @@ def parse_args():
 
     return parser.parse_args()
 
+
 def layer_info_to_string(li, format_str):
     if li["name"] == "sub_pyramid":
         return format_str.format(li["name"], "[]", "[]", "[]", 0, 0, 0, 0, "")
@@ -254,5 +311,6 @@ def main():
     print(construct_layer_table(result))
     print(construct_layer_statistics(result))
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()

From f4f6169763420b129307ba94a9e83b7cedb141f2 Mon Sep 17 00:00:00 2001
From: Michael Goin <michael@neuralmagic.com>
Date: Thu, 31 Mar 2022 16:51:24 -0400
Subject: [PATCH 3/4] Style and quality

---
 src/deepsparse/analyze.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py
index 6bf0a751d2..69cb58f7fa 100644
--- a/src/deepsparse/analyze.py
+++ b/src/deepsparse/analyze.py
@@ -57,7 +57,6 @@
 
 import argparse
 import pprint
-import sys
 
 from deepsparse import analyze_model
 from deepsparse.utils import (
@@ -167,18 +166,23 @@ def layer_info_to_string(li, format_str):
 
 def construct_layer_table(result):
     table_str = (
-        "Name                        | OutDims                    | KerDims                    |"
-        + " Strides      | ActSpars | Time(ms) |  Util(%) | TFLOPS   | Canonical Name\n"
+        "Name                        | OutDims                    | "
+        "KerDims                    | Strides      | ActSpars | "
+        "Time(ms) |  Util(%) | TFLOPS   | Canonical Name\n"
+    )
+    info_format_base = (
+        "{:26} | {:26} | {:12} | {: >#08.4f} | "
+        "{: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}"
     )
     for li in result["layer_info"]:
         table_str += layer_info_to_string(
             li,
-            "{:28}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n",
+            "{:28}| " + info_format_base + "\n",
         )
         for sub_li in li["sub_layer_info"]:
             table_str += layer_info_to_string(
                 sub_li,
-                "  {:26}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n",
+                "  {:26}| " + info_format_base + "\n",
             )
 
     table_str += "Total Time(MS): {:05f}\n".format(result["average_total_time"])
@@ -265,7 +269,6 @@ def construct_layer_statistics(result):
 def main():
     args = parse_args()
 
-    random_input = args.random_input
     num_warmup_iterations = args.num_warmup_iterations
     num_iterations = args.num_iterations
     num_cores = args.num_cores
@@ -284,7 +287,7 @@ def main():
     orig_model_path = args.model_path
     model_path = model_to_path(args.model_path)
 
-    print("onnx_filename: " + str(model_path))
+    print("Analyzing model: {}".format(orig_model_path))
 
     if input_shapes:
         with override_onnx_input_shapes(model_path, input_shapes) as tmp_path:

From dd3df95188dc61e30a5b3dfff2406217bb503838 Mon Sep 17 00:00:00 2001
From: Michael Goin <michael@neuralmagic.com>
Date: Fri, 1 Apr 2022 10:54:39 -0400
Subject: [PATCH 4/4] Add default WAND_ENABLE_SP_BENCH, quiet flag, and JSON
 export

---
 src/deepsparse/analyze.py | 81 ++++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 36 deletions(-)

diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py
index 69cb58f7fa..86f907af0f 100644
--- a/src/deepsparse/analyze.py
+++ b/src/deepsparse/analyze.py
@@ -19,9 +19,10 @@
 Command help:
 usage: deepsparse.analyze [-h] [-wi NUM_WARMUP_ITERATIONS]
                           [-bi NUM_ITERATIONS] [-ncores NUM_CORES]
-                          [-b BATCH_SIZE] [-v] [-ks KERNEL_SPARSITY]
+                          [-b BATCH_SIZE] [-ks KERNEL_SPARSITY]
                           [-ksf KERNEL_SPARSITY_FILE]
-                          [--optimization OPTIMIZATION] [-i INPUT_SHAPES]
+                          [--optimization OPTIMIZATION] [-i INPUT_SHAPES] [-q]
+                          [-x EXPORT_PATH]
                           model_path
 
 Analyze ONNX models in the DeepSparse Engine
@@ -42,7 +43,6 @@
   -b BATCH_SIZE, --batch_size BATCH_SIZE
                         The number of inputs that will run through the model
                         at a time
-  -v, --verbose         Print all of the benchmark info
   -ks KERNEL_SPARSITY, --kernel_sparsity KERNEL_SPARSITY
                         Impose kernel sparsity for all convolutions. [0.0-1.0]
   -ksf KERNEL_SPARSITY_FILE, --kernel_sparsity_file KERNEL_SPARSITY_FILE
@@ -53,10 +53,14 @@
                         Override the shapes of the inputs, i.e. -shapes
                         "[1,2,3],[4,5,6],[7,8,9]" results in input0=[1,2,3]
                         input1=[4,5,6] input2=[7,8,9]
+  -q, --quiet           Lower logging verbosity
+  -x EXPORT_PATH, --export_path EXPORT_PATH
+                        Store results into a JSON file
 """
 
 import argparse
-import pprint
+import json
+import os
 
 from deepsparse import analyze_model
 from deepsparse.utils import (
@@ -83,14 +87,14 @@ def parse_args():
         help="The number of warmup runs that will be executed before the \
         actual benchmarking",
         type=int,
-        default=0,
+        default=5,
     )
     parser.add_argument(
         "-bi",
         "--num_iterations",
         help="The number of times the benchmark will be run",
         type=int,
-        default=1,
+        default=5,
     )
     parser.add_argument(
         "-ncores",
@@ -109,13 +113,6 @@ def parse_args():
         type=int,
         default=1,
     )
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        help="Print all of the benchmark info",
-        action="store_true",
-        default=False,
-    )
     parser.add_argument(
         "-ks",
         "--kernel_sparsity",
@@ -143,6 +140,20 @@ def parse_args():
         type=str,
         default="",
     )
+    parser.add_argument(
+        "-q",
+        "--quiet",
+        help="Lower logging verbosity",
+        action="store_true",
+        default=False,
+    )
+    parser.add_argument(
+        "-x",
+        "--export_path",
+        help="Store results into a JSON file",
+        type=str,
+        default=None,
+    )
 
     return parser.parse_args()
 
@@ -269,20 +280,15 @@ def construct_layer_statistics(result):
 def main():
     args = parse_args()
 
-    num_warmup_iterations = args.num_warmup_iterations
-    num_iterations = args.num_iterations
-    num_cores = args.num_cores
-    batch_size = args.batch_size
-    imposed_activation_sparsity = args.activation_sparsity
-    imposed_kernel_sparsity = args.kernel_sparsity
-    imposed_kernel_sparsity_file = args.kernel_sparsity_file
-    optimization_level = args.optimization
     input_shapes = parse_input_shapes(args.input_shapes)
-    verbose = args.verbose
+
+    if args.optimization:
+        os.environ["WAND_ENABLE_SP_BENCH"] = "1"
 
     # Imposed KS can take either a float or a file, so overwrite with file if we have it
-    if imposed_kernel_sparsity_file:
-        imposed_kernel_sparsity = imposed_kernel_sparsity_file
+    imposed_kernel_sparsity = args.kernel_sparsity
+    if args.kernel_sparsity_file:
+        imposed_kernel_sparsity = args.kernel_sparsity_file
 
     orig_model_path = args.model_path
     model_path = model_to_path(args.model_path)
@@ -291,29 +297,32 @@ def main():
 
     if input_shapes:
         with override_onnx_input_shapes(model_path, input_shapes) as tmp_path:
-            input_list = generate_random_inputs(tmp_path, batch_size)
+            input_list = generate_random_inputs(tmp_path, args.batch_size)
     else:
-        input_list = generate_random_inputs(model_path, batch_size)
+        input_list = generate_random_inputs(model_path, args.batch_size)
 
     result = analyze_model(
         model_path,
         input_list,
-        batch_size=batch_size,
-        num_cores=num_cores,
-        num_iterations=num_iterations,
-        num_warmup_iterations=num_warmup_iterations,
-        optimization_level=optimization_level,
-        imposed_as=imposed_activation_sparsity,
+        batch_size=args.batch_size,
+        num_cores=args.num_cores,
+        num_iterations=args.num_iterations,
+        num_warmup_iterations=args.num_warmup_iterations,
+        optimization_level=args.optimization,
         imposed_ks=imposed_kernel_sparsity,
         input_shapes=input_shapes,
     )
 
-    if verbose:
-        pprint.pprint(result)
-
-    print(construct_layer_table(result))
+    if args.quiet:
+        print(construct_layer_table(result))
     print(construct_layer_statistics(result))
 
+    if args.export_path:
+        # Export results
+        print("Saving analysis results to JSON file at {}".format(args.export_path))
+        with open(args.export_path, "w") as out:
+            json.dump(result, out, indent=2)
+
 
 if __name__ == "__main__":
     main()