From 7f05c6d343163615ffc52b84aa4bba2a469f19d8 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 30 Mar 2022 17:23:21 -0400 Subject: [PATCH 1/4] Add `deepsparse.analyze` --- setup.py | 1 + src/deepsparse/analyze.py | 258 ++++++++++++++++++ .../benchmark_model/benchmark_model.py | 1 + 3 files changed, 260 insertions(+) create mode 100644 src/deepsparse/analyze.py diff --git a/setup.py b/setup.py index 204af8530d..855f119dd5 100644 --- a/setup.py +++ b/setup.py @@ -183,6 +183,7 @@ def _setup_entry_points() -> Dict: "console_scripts": [ f"deepsparse.transformers.run_inference={data_api_entrypoint}", f"deepsparse.transformers.eval_downstream={eval_downstream}", + "deepsparse.analyze=deepsparse.analyze:main", "deepsparse.check_hardware=deepsparse.cpu:print_hardware_capability", "deepsparse.benchmark=deepsparse.benchmark_model.benchmark_model:main", "deepsparse.server=deepsparse.server.main:start_server", diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py new file mode 100644 index 0000000000..d4d7147388 --- /dev/null +++ b/src/deepsparse/analyze.py @@ -0,0 +1,258 @@ + + +import argparse +import sys +import pprint + +from deepsparse import analyze_model +from deepsparse.utils import ( + generate_random_inputs, + model_to_path, + override_onnx_input_shapes, + parse_input_shapes, +) + +def parse_args(): + parser = argparse.ArgumentParser( + description="Analyze ONNX models in the DeepSparse Engine" + ) + + parser.add_argument( + "model_path", + type=str, + help="Path to an ONNX model file or SparseZoo model stub", + ) + parser.add_argument( + "-wi", + "--num_warmup_iterations", + help="The number of warmup runs that will be executed before the \ + actual benchmarking", + type=int, + default=0, + ) + parser.add_argument( + "-bi", + "--num_iterations", + help="The number of times the benchmark will be run", + type=int, + default=1, + ) + parser.add_argument( + "-ncores", + "--num_cores", + type=int, + default=None, + help=( + "The number of physical cores to run the analysis on, " + "defaults to all physical cores available on the system" + ), + ) + parser.add_argument( + "-b", + "--batch_size", + help="The number of inputs that will run through the model at a time", + type=int, + default=1, + ) + parser.add_argument( + "-v", + "--verbose", + help="Print all of the benchmark info", + action="store_true", + default=False, + ) + parser.add_argument( + "-ks", + "--kernel_sparsity", + help="Impose kernel sparsity for all convolutions. [0.0-1.0]", + type=float, + ) + parser.add_argument( + "-ksf", + "--kernel_sparsity_file", + help="Filepath to per-layer kernel sparsities JSON", + type=str, + ) + parser.add_argument( + "--optimization", + help="To enable or disable optimizations (Tensor Columns)", + type=bool, + default=True, + ) + parser.add_argument( + "-i", + "--input_shapes", + help="Override the shapes of the inputs, " + 'i.e. -shapes "[1,2,3],[4,5,6],[7,8,9]" results in ' + "input0=[1,2,3] input1=[4,5,6] input2=[7,8,9]", + type=str, + default="", + ) + + return parser.parse_args() + +def layer_info_to_string(li, format_str): + if li["name"] == "sub_pyramid": + return format_str.format(li["name"], "[]", "[]", "[]", 0, 0, 0, 0, "") + else: + return format_str.format( + li["name"], + "{}".format(list(li["output_dims"].values())), + "{}".format(list(li["kernel_dims"].values())), + "{}".format(list(li["strides"].values())), + li["activation_sparsity"], + li["average_run_time_in_ms"], + li["average_utilization"] * 100.0, + li["average_teraflops_per_second"], + li["canonical_name"], + ) + + +def construct_layer_table(result): + table_str = ( + "Name | OutDims | KerDims |" + + " Strides | ActSpars | Time(ms) | Util(%) | TFLOPS | Canonical Name\n" + ) + for li in result["layer_info"]: + table_str += layer_info_to_string( + li, + "{:28}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n", + ) + for sub_li in li["sub_layer_info"]: + table_str += layer_info_to_string( + sub_li, + " {:26}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n", + ) + + table_str += "Total Time(MS): {:05f}\n".format(result["average_total_time"]) + table_str += "Items per second: {:05f}\n".format(result["items_per_second"]) + table_str += "Batch Size: {}\n".format(result["batch_size"]) + table_str += "Number of threads: {}\n".format(result["num_threads"]) + + return table_str + + +def process_line_item(total_layer_time, detailed_layer_time, li, strip_name): + if "average_run_time_in_ms" not in li: + # nothing to process + return + + layer_type = li["name"] + if strip_name: + # peel off unique number + layer_type = layer_type.rsplit("_", 1)[0] + # peel off ks percentage + layer_type = layer_type.rsplit("-", 1)[0] + + avg_layer_time = li["average_run_time_in_ms"] + + if layer_type in total_layer_time: + total_layer_time[layer_type] += avg_layer_time + else: + total_layer_time[layer_type] = avg_layer_time + + # Record detailed layer types as well + if "kernel_dims" in li: + kerdims = list(li["kernel_dims"].values()) + if kerdims: + detailed_layer_type = f"{layer_type}|kernel={kerdims}" + if detailed_layer_type in detailed_layer_time: + detailed_layer_time[detailed_layer_type] += avg_layer_time + else: + detailed_layer_time[detailed_layer_type] = avg_layer_time + + +def construct_layer_statistics(result): + # Percentage Statistics + total_layer_time = {} + detailed_layer_time = {} + for li in result["layer_info"]: + if len(li["sub_layer_info"]) == 0: + process_line_item(total_layer_time, detailed_layer_time, li, True) + else: + for sli in li["sub_layer_info"]: + process_line_item(total_layer_time, detailed_layer_time, sli, False) + + summed_total_time = 0.0 + for k, v in total_layer_time.items(): + summed_total_time += v + + perc_str = "== Layer Breakdown ==\n" + perc_str += "Name | Summed Time | Percent Taken\n" + for name, val in total_layer_time.items(): + # Print summary for this type of layer + perc_str += "{:30} | {:8.3f} | {:4.2f}%\n".format( + name, val, (val / summed_total_time) * 100.0 + ) + + # Do the same for any sub-types recorded (there can be none) + sublayers = [ + (key.split("|", 1)[1], value) + for key, value in detailed_layer_time.items() + if name == key.split("|", 1)[0] + ] + for subname, subval in sublayers: + perc_str += " {:28} | {:8.3f} | {:4.2f}%\n".format( + subname, subval, (subval / summed_total_time) * 100.0 + ) + + batch_size = int(result["batch_size"]) + perc_str += "== Summed Total Time: {:.4f} ms\n".format(summed_total_time) + perc_str += "== Items per second: {:.4f}\n".format( + (1000.0 / summed_total_time) * batch_size + ) + + return perc_str + + +def main(): + args = parse_args() + + random_input = args.random_input + num_warmup_iterations = args.num_warmup_iterations + num_iterations = args.num_iterations + num_cores = args.num_cores + batch_size = args.batch_size + imposed_activation_sparsity = args.activation_sparsity + imposed_kernel_sparsity = args.kernel_sparsity + imposed_kernel_sparsity_file = args.kernel_sparsity_file + optimization_level = args.optimization + input_shapes = parse_input_shapes(args.input_shapes) + verbose = args.verbose + + # Imposed KS can take either a float or a file, so overwrite with file if we have it + if imposed_kernel_sparsity_file: + imposed_kernel_sparsity = imposed_kernel_sparsity_file + + orig_model_path = args.model_path + model_path = model_to_path(args.model_path) + + print("onnx_filename: " + str(model_path)) + + if input_shapes: + with override_onnx_input_shapes(model_path, input_shapes) as tmp_path: + input_list = generate_random_inputs(tmp_path, batch_size) + else: + input_list = generate_random_inputs(model_path, batch_size) + + result = analyze_model( + model_path, + input_list, + batch_size=batch_size, + num_cores=num_cores, + num_iterations=num_iterations, + num_warmup_iterations=num_warmup_iterations, + optimization_level=optimization_level, + imposed_as=imposed_activation_sparsity, + imposed_ks=imposed_kernel_sparsity, + input_shapes=input_shapes, + ) + + if verbose: + pprint.pprint(result) + + print(construct_layer_table(result)) + print(construct_layer_statistics(result)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/deepsparse/benchmark_model/benchmark_model.py b/src/deepsparse/benchmark_model/benchmark_model.py index 1780423545..4ff874b0c6 100644 --- a/src/deepsparse/benchmark_model/benchmark_model.py +++ b/src/deepsparse/benchmark_model/benchmark_model.py @@ -133,6 +133,7 @@ def parse_args(): help="The batch size to run the analysis for. Must be greater than 0", ) parser.add_argument( + "-i", "-shapes", "--input_shapes", type=str, From df2e1a3de7500c8c506aca4b558ad272a4eab214 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 30 Mar 2022 17:26:46 -0400 Subject: [PATCH 2/4] Comment --- src/deepsparse/analyze.py | 64 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py index d4d7147388..6bf0a751d2 100644 --- a/src/deepsparse/analyze.py +++ b/src/deepsparse/analyze.py @@ -1,8 +1,63 @@ - +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Analysis script for ONNX models with the DeepSparse engine. + +########## +Command help: +usage: deepsparse.analyze [-h] [-wi NUM_WARMUP_ITERATIONS] + [-bi NUM_ITERATIONS] [-ncores NUM_CORES] + [-b BATCH_SIZE] [-v] [-ks KERNEL_SPARSITY] + [-ksf KERNEL_SPARSITY_FILE] + [--optimization OPTIMIZATION] [-i INPUT_SHAPES] + model_path + +Analyze ONNX models in the DeepSparse Engine + +positional arguments: + model_path Path to an ONNX model file or SparseZoo model stub + +optional arguments: + -h, --help show this help message and exit + -wi NUM_WARMUP_ITERATIONS, --num_warmup_iterations NUM_WARMUP_ITERATIONS + The number of warmup runs that will be executed before + the actual benchmarking + -bi NUM_ITERATIONS, --num_iterations NUM_ITERATIONS + The number of times the benchmark will be run + -ncores NUM_CORES, --num_cores NUM_CORES + The number of physical cores to run the analysis on, + defaults to all physical cores available on the system + -b BATCH_SIZE, --batch_size BATCH_SIZE + The number of inputs that will run through the model + at a time + -v, --verbose Print all of the benchmark info + -ks KERNEL_SPARSITY, --kernel_sparsity KERNEL_SPARSITY + Impose kernel sparsity for all convolutions. [0.0-1.0] + -ksf KERNEL_SPARSITY_FILE, --kernel_sparsity_file KERNEL_SPARSITY_FILE + Filepath to per-layer kernel sparsities JSON + --optimization OPTIMIZATION + To enable or disable optimizations (Tensor Columns) + -i INPUT_SHAPES, --input_shapes INPUT_SHAPES + Override the shapes of the inputs, i.e. -shapes + "[1,2,3],[4,5,6],[7,8,9]" results in input0=[1,2,3] + input1=[4,5,6] input2=[7,8,9] +""" import argparse -import sys import pprint +import sys from deepsparse import analyze_model from deepsparse.utils import ( @@ -12,6 +67,7 @@ parse_input_shapes, ) + def parse_args(): parser = argparse.ArgumentParser( description="Analyze ONNX models in the DeepSparse Engine" @@ -91,6 +147,7 @@ def parse_args(): return parser.parse_args() + def layer_info_to_string(li, format_str): if li["name"] == "sub_pyramid": return format_str.format(li["name"], "[]", "[]", "[]", 0, 0, 0, 0, "") @@ -254,5 +311,6 @@ def main(): print(construct_layer_table(result)) print(construct_layer_statistics(result)) + if __name__ == "__main__": - main() \ No newline at end of file + main() From f4f6169763420b129307ba94a9e83b7cedb141f2 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Thu, 31 Mar 2022 16:51:24 -0400 Subject: [PATCH 3/4] Style and quality --- src/deepsparse/analyze.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py index 6bf0a751d2..69cb58f7fa 100644 --- a/src/deepsparse/analyze.py +++ b/src/deepsparse/analyze.py @@ -57,7 +57,6 @@ import argparse import pprint -import sys from deepsparse import analyze_model from deepsparse.utils import ( @@ -167,18 +166,23 @@ def layer_info_to_string(li, format_str): def construct_layer_table(result): table_str = ( - "Name | OutDims | KerDims |" - + " Strides | ActSpars | Time(ms) | Util(%) | TFLOPS | Canonical Name\n" + "Name | OutDims | " + "KerDims | Strides | ActSpars | " + "Time(ms) | Util(%) | TFLOPS | Canonical Name\n" + ) + info_format_base = ( + "{:26} | {:26} | {:12} | {: >#08.4f} | " + "{: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}" ) for li in result["layer_info"]: table_str += layer_info_to_string( li, - "{:28}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n", + "{:28}| " + info_format_base + "\n", ) for sub_li in li["sub_layer_info"]: table_str += layer_info_to_string( sub_li, - " {:26}| {:26} | {:26} | {:12} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}\n", + " {:26}| " + info_format_base + "\n", ) table_str += "Total Time(MS): {:05f}\n".format(result["average_total_time"]) @@ -265,7 +269,6 @@ def construct_layer_statistics(result): def main(): args = parse_args() - random_input = args.random_input num_warmup_iterations = args.num_warmup_iterations num_iterations = args.num_iterations num_cores = args.num_cores @@ -284,7 +287,7 @@ def main(): orig_model_path = args.model_path model_path = model_to_path(args.model_path) - print("onnx_filename: " + str(model_path)) + print("Analyzing model: {}".format(orig_model_path)) if input_shapes: with override_onnx_input_shapes(model_path, input_shapes) as tmp_path: From dd3df95188dc61e30a5b3dfff2406217bb503838 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 1 Apr 2022 10:54:39 -0400 Subject: [PATCH 4/4] Add default WAND_ENABLE_SP_BENCH, quiet flag, and JSON export --- src/deepsparse/analyze.py | 81 ++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/src/deepsparse/analyze.py b/src/deepsparse/analyze.py index 69cb58f7fa..86f907af0f 100644 --- a/src/deepsparse/analyze.py +++ b/src/deepsparse/analyze.py @@ -19,9 +19,10 @@ Command help: usage: deepsparse.analyze [-h] [-wi NUM_WARMUP_ITERATIONS] [-bi NUM_ITERATIONS] [-ncores NUM_CORES] - [-b BATCH_SIZE] [-v] [-ks KERNEL_SPARSITY] + [-b BATCH_SIZE] [-ks KERNEL_SPARSITY] [-ksf KERNEL_SPARSITY_FILE] - [--optimization OPTIMIZATION] [-i INPUT_SHAPES] + [--optimization OPTIMIZATION] [-i INPUT_SHAPES] [-q] + [-x EXPORT_PATH] model_path Analyze ONNX models in the DeepSparse Engine @@ -42,7 +43,6 @@ -b BATCH_SIZE, --batch_size BATCH_SIZE The number of inputs that will run through the model at a time - -v, --verbose Print all of the benchmark info -ks KERNEL_SPARSITY, --kernel_sparsity KERNEL_SPARSITY Impose kernel sparsity for all convolutions. [0.0-1.0] -ksf KERNEL_SPARSITY_FILE, --kernel_sparsity_file KERNEL_SPARSITY_FILE @@ -53,10 +53,14 @@ Override the shapes of the inputs, i.e. -shapes "[1,2,3],[4,5,6],[7,8,9]" results in input0=[1,2,3] input1=[4,5,6] input2=[7,8,9] + -q, --quiet Lower logging verbosity + -x EXPORT_PATH, --export_path EXPORT_PATH + Store results into a JSON file """ import argparse -import pprint +import json +import os from deepsparse import analyze_model from deepsparse.utils import ( @@ -83,14 +87,14 @@ def parse_args(): help="The number of warmup runs that will be executed before the \ actual benchmarking", type=int, - default=0, + default=5, ) parser.add_argument( "-bi", "--num_iterations", help="The number of times the benchmark will be run", type=int, - default=1, + default=5, ) parser.add_argument( "-ncores", @@ -109,13 +113,6 @@ def parse_args(): type=int, default=1, ) - parser.add_argument( - "-v", - "--verbose", - help="Print all of the benchmark info", - action="store_true", - default=False, - ) parser.add_argument( "-ks", "--kernel_sparsity", @@ -143,6 +140,20 @@ def parse_args(): type=str, default="", ) + parser.add_argument( + "-q", + "--quiet", + help="Lower logging verbosity", + action="store_true", + default=False, + ) + parser.add_argument( + "-x", + "--export_path", + help="Store results into a JSON file", + type=str, + default=None, + ) return parser.parse_args() @@ -269,20 +280,15 @@ def construct_layer_statistics(result): def main(): args = parse_args() - num_warmup_iterations = args.num_warmup_iterations - num_iterations = args.num_iterations - num_cores = args.num_cores - batch_size = args.batch_size - imposed_activation_sparsity = args.activation_sparsity - imposed_kernel_sparsity = args.kernel_sparsity - imposed_kernel_sparsity_file = args.kernel_sparsity_file - optimization_level = args.optimization input_shapes = parse_input_shapes(args.input_shapes) - verbose = args.verbose + + if args.optimization: + os.environ["WAND_ENABLE_SP_BENCH"] = "1" # Imposed KS can take either a float or a file, so overwrite with file if we have it - if imposed_kernel_sparsity_file: - imposed_kernel_sparsity = imposed_kernel_sparsity_file + imposed_kernel_sparsity = args.kernel_sparsity + if args.kernel_sparsity_file: + imposed_kernel_sparsity = args.kernel_sparsity_file orig_model_path = args.model_path model_path = model_to_path(args.model_path) @@ -291,29 +297,32 @@ def main(): if input_shapes: with override_onnx_input_shapes(model_path, input_shapes) as tmp_path: - input_list = generate_random_inputs(tmp_path, batch_size) + input_list = generate_random_inputs(tmp_path, args.batch_size) else: - input_list = generate_random_inputs(model_path, batch_size) + input_list = generate_random_inputs(model_path, args.batch_size) result = analyze_model( model_path, input_list, - batch_size=batch_size, - num_cores=num_cores, - num_iterations=num_iterations, - num_warmup_iterations=num_warmup_iterations, - optimization_level=optimization_level, - imposed_as=imposed_activation_sparsity, + batch_size=args.batch_size, + num_cores=args.num_cores, + num_iterations=args.num_iterations, + num_warmup_iterations=args.num_warmup_iterations, + optimization_level=args.optimization, imposed_ks=imposed_kernel_sparsity, input_shapes=input_shapes, ) - if verbose: - pprint.pprint(result) - - print(construct_layer_table(result)) + if args.quiet: + print(construct_layer_table(result)) print(construct_layer_statistics(result)) + if args.export_path: + # Export results + print("Saving analysis results to JSON file at {}".format(args.export_path)) + with open(args.export_path, "w") as out: + json.dump(result, out, indent=2) + if __name__ == "__main__": main()