From e86c93f32598bdb4aa2a8aeea6d88ca18ddc37bb Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Sat, 18 Feb 2023 13:42:23 -0800 Subject: [PATCH 01/22] VIT test, saving as a branch, not for checkin --- .../tools/transformers/dev_benchmark.cmd | 14 +++--- .../tools/transformers/fusion_attention.py | 50 +++++++++++++------ .../tools/transformers/huggingface_models.py | 2 + .../python/tools/transformers/import.py | 20 ++++++++ 4 files changed, 65 insertions(+), 21 deletions(-) create mode 100644 onnxruntime/python/tools/transformers/import.py diff --git a/onnxruntime/python/tools/transformers/dev_benchmark.cmd b/onnxruntime/python/tools/transformers/dev_benchmark.cmd index 7a9b3254a1708..c3de6519bd197 100644 --- a/onnxruntime/python/tools/transformers/dev_benchmark.cmd +++ b/onnxruntime/python/tools/transformers/dev_benchmark.cmd @@ -21,25 +21,27 @@ set run_torchscript=false REM Devices to test. REM Attention: You cannot run both CPU and GPU at the same time: gpu need onnxruntime-gpu, and CPU need onnxruntime. -set run_gpu_fp32=false +set run_gpu_fp32=true set run_gpu_fp16=false -set run_cpu_fp32=true -set run_cpu_int8=true +set run_cpu_fp32=false +set run_cpu_int8=false set average_over=100 REM Enable optimizer (use script instead of OnnxRuntime for graph optimization) set use_optimizer=true -set batch_sizes=1 -set sequence_length=8 128 +set batch_sizes=1 4 +set sequence_length=32 64 REM Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model. REM Note that different input count might lead to different performance set input_counts=1 REM Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased -set models_to_test=bert-base-cased +REM set models_to_test=bert-base-cased +set models_to_test="google/vit-base-patch16-224" +REM set models_to_test="google/vit-base-patch32" REM If you have mutliple GPUs, you can choose one GPU for test. Here is an example to use the second GPU: REM set CUDA_VISIBLE_DEVICES=1 diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 342d43306e699..82d8095328765 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -419,18 +419,27 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): else: return - other_inputs = [] - for i, input in enumerate(start_node.input): - if input not in output_name_to_node: - continue + # Match Vit + vit_nodes = self.model.match_parent_path( + matmul_qkv, ["Transpose", "Reshape", "Add", "MatMul"], [None, None, None, None] + ) + if vit_nodes is not None: + root_input = vit_nodes[3].input[0] + else: + other_inputs = [] + for i, input in enumerate(start_node.input): + if input not in output_name_to_node: + continue + + if input == qkv_nodes[0].output[0]: + continue + other_inputs.append(input) + if len(other_inputs) != 1: + return + + root_input = other_inputs[0] - if input == qkv_nodes[0].output[0]: - continue - other_inputs.append(input) - if len(other_inputs) != 1: - return - root_input = other_inputs[0] """ Match flaubert Mask | @@ -471,11 +480,13 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): is_distill = False is_distill_add = False + is_no_add = False qk_paths = { "path1": (["Softmax", "Add", "Div", "MatMul"], [0, 0, None, 0]), "path2": (["Softmax", "Add", "Mul", "MatMul"], [0, 0, None, 0]), "path3": (["Softmax", "Where", "MatMul", "Div"], [0, 0, 2, 0]), "path4": (["Softmax", "Add", "Where", "MatMul"], [0, 0, 0, 2]), + "path5": (["Softmax", "Div", "MatMul"], [0, None, 0]), } qk_nodes = None @@ -487,6 +498,8 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): is_distill = True if k == "path4": is_distill_add = True + if k == "path5": + is_no_add = True break if qk_nodes is None: @@ -500,6 +513,8 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): (_, where_qk, matmul_qk, _) = qk_nodes elif is_distill_add: (_, add_qk, where_qk, matmul_qk) = qk_nodes + elif is_no_add: + (_, _, matmul_qk) = qk_nodes else: (_, add_qk, _, matmul_qk) = qk_nodes @@ -557,6 +572,8 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): if add_qk_str is None: logger.debug(f"fuse_attention: failed to verify shape inference of {add_qk}") return + elif is_no_add: + pass else: _, mask_nodes, _ = self.model.match_parent_paths( add_qk, @@ -569,17 +586,20 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): ], output_name_to_node, ) - if mask_nodes is None: - logger.debug("fuse_attention: failed to match mask path") - return +# if mask_nodes is None: +# logger.debug("fuse_attention: failed to match mask path") +# return - if len(mask_nodes) > 1 and mask_nodes[0].op_type == "Mul": + if mask_nodes is not None and len(mask_nodes) > 1 and mask_nodes[0].op_type == "Mul": _, mul_val = self.model.get_constant_input(mask_nodes[0]) if mul_val != -10000: self.mask_filter_value = mul_val if matmul_v.input[0] == root_input and matmul_q.input[0] == root_input and matmul_k.input[0] == root_input: - mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) + if mask_nodes is None: + mask_index = None + else: + mask_index = self.attention_mask.process_mask(mask_nodes[-1].input[0]) attention_last_node = reshape_qkv if einsum_node is None else transpose_qkv diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index cdf75efb1e62d..0aabcad3de2a2 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -16,6 +16,8 @@ # List of pretrained models: https://huggingface.co/transformers/pretrained_models.html # Pretrained model name to a tuple of input names, opset_version, use_external_data_format, optimization model type MODELS = { + "google/vit-base-patch16-224": (["input_ids"], 12, False, "bert"), + # BERT "bert-base-uncased": ( ["input_ids", "attention_mask", "token_type_ids"], diff --git a/onnxruntime/python/tools/transformers/import.py b/onnxruntime/python/tools/transformers/import.py new file mode 100644 index 0000000000000..9c734e459ae40 --- /dev/null +++ b/onnxruntime/python/tools/transformers/import.py @@ -0,0 +1,20 @@ +from transformers import AutoImageProcessor, ViTModel +import torch +from datasets import load_dataset + +dataset = load_dataset("huggingface/cats-image") +image = dataset["test"]["image"][0] + +image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k") +model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k") + +inputs = image_processor(image, return_tensors="pt") + +with torch.no_grad(): + outputs = model(**inputs) + +# last_hidden_states = outputs.last_hidden_state +# list(last_hidden_states.shape) + +# print(inputs) +torch.onnx.export(model, inputs['pixel_values'], "TestModel.onnx", verbose=True) From 11b1d5d1b193df9ee46a2daf36d1448aa4609429 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Wed, 8 Mar 2023 16:16:05 -0800 Subject: [PATCH 02/22] Push branch for testing on benchmark machine --- .../python/tools/transformers/benchmark.py | 6 ++- .../tools/transformers/onnx_exporter.py | 50 ++++++++++++++++--- .../tools/transformers/run_benchmark.sh | 8 +-- 3 files changed, 52 insertions(+), 12 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 23f1be3eeed2f..afc392402cd27 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -258,7 +258,8 @@ def run_onnxruntime( } logger.info( - "Run onnxruntime on {} with input shape {}".format(model_name, [batch_size, sequence_length]) + # "Run onnxruntime on {} with input shape {}".format(model_name, [batch_size, sequence_length]) + "Run onnxruntime on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224]) ) if disable_ort_io_binding: @@ -359,7 +360,8 @@ def run_pytorch( if max_input_size is not None and sequence_length > max_input_size: continue - logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) + # logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) + logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224])) input_ids = torch.randint( low=0, high=config.vocab_size - 1, diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index c4dda99496ebe..f947d04553c30 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -10,13 +10,23 @@ from pathlib import Path import numpy +import requests import torch from affinity_helper import AffinitySetting from benchmark_helper import OptimizerInfo, Precision, create_onnxruntime_session from huggingface_models import MODEL_CLASSES +from PIL import Image from quantize_helper import QuantizeHelper from torch_onnx_export_helper import torch_onnx_export -from transformers import AutoConfig, AutoTokenizer, LxmertConfig, TransfoXLConfig +from transformers import ( + AutoConfig, + AutoModelForImageClassification, + AutoTokenizer, + LxmertConfig, + TransfoXLConfig, + ViTForImageClassification, + ViTImageProcessor, +) sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) from gpt2_helper import PRETRAINED_GPT2_MODELS, GPT2ModelNoPastState, TFGPT2ModelNoPastState @@ -49,9 +59,11 @@ def restore_torch_functions(): def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names, config, data_type=numpy.int64): - input_ids = numpy.random.randint(low=0, high=vocab_size - 1, size=(batch_size, sequence_length), dtype=data_type) + # input_ids = numpy.random.randint(low=0, high=vocab_size - 1, size=(batch_size, sequence_length), dtype=data_type) + input_ids = numpy.random.rand(batch_size, 3, 224, 224).astype(numpy.float32) - inputs = {"input_ids": input_ids} + # inputs = {"input_ids": input_ids} + inputs = {"pixel_values": input_ids} if "attention_mask" in input_names: attention_mask = numpy.ones([batch_size, sequence_length], dtype=data_type) @@ -106,6 +118,15 @@ def build_dynamic_axes(example_inputs, outputs_flatten): dynamic_axes[output_name].update({j: "seq_len"}) return dynamic_axes, output_names +def build_dynamic_axes_vit(example_inputs, outputs_flatten): + # dynamic_axes={ + # 'pixel_values': {0: 'batch_size', 1: 'num_channels', 2: 'height', 3:'width'}, + # 'logits': {0: 'batch_size', 1: 'sequence_length'} + # } + + dynamic_axes = {key: {0: "pixel_values"} for key in example_inputs.keys()} + output_names = ["logits"] + return dynamic_axes, output_names def validate_onnx_model( onnx_model_path, @@ -439,7 +460,8 @@ def validate_and_optimize_onnx( model_fusion_statistics, ) - return onnx_model_path, is_valid_onnx_model, config.vocab_size + # return onnx_model_path, is_valid_onnx_model, config.vocab_size + return onnx_model_path, is_valid_onnx_model, config.num_labels def export_onnx_model_from_pt( @@ -466,6 +488,7 @@ def export_onnx_model_from_pt( # config, model = load_pt_model_from_tf(model_name) model.cpu() + """ tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) max_input_size = ( tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 @@ -474,13 +497,25 @@ def export_onnx_model_from_pt( example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt") example_inputs = filter_inputs(example_inputs, input_names) + """ + + # url = 'http://images.cocodataset.org/val2017/000000039769.jpg' # Egyptian cats + # image = Image.open(requests.get(url, stream=True).raw) + # processor = ViTImageProcessor.from_pretrained(model_name) + # model = ViTForImageClassification.from_pretrained(model_name) + model = AutoModelForImageClassification.from_pretrained(model_name) + + # example_inputs = processor(images=image, return_tensors="pt") + + max_input_size = 1024 # What to use for ViT? + example_inputs = inputs = { 'pixel_values' : torch.rand(2,3,224,224) } example_outputs = model(**example_inputs) - assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" + # assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" # Flatten is needed for gpt2 and distilgpt2. - example_outputs_flatten = flatten(example_outputs) + example_outputs_flatten = flatten(example_outputs['logits']) example_outputs_flatten = update_flatten_list(example_outputs_flatten, []) onnx_model_path = get_onnx_file_path( @@ -498,7 +533,8 @@ def export_onnx_model_from_pt( logger.info("Exporting ONNX model to {}".format(onnx_model_path)) Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True) - dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) + # dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) + dynamic_axes, output_names = build_dynamic_axes_vit(example_inputs, example_outputs_flatten) replace_torch_functions() torch_onnx_export( diff --git a/onnxruntime/python/tools/transformers/run_benchmark.sh b/onnxruntime/python/tools/transformers/run_benchmark.sh index f0422839c11eb..ee33555b8e526 100644 --- a/onnxruntime/python/tools/transformers/run_benchmark.sh +++ b/onnxruntime/python/tools/transformers/run_benchmark.sh @@ -14,7 +14,7 @@ use_package=true # only need once -run_install=true +run_install=false # Engines to test. # To run ort_trt, you need to build and install the onnxruntime-gpu-tensorrt package on your own @@ -49,7 +49,8 @@ layer_number=16 # Batch Sizes and Sequence Lengths batch_sizes="1 4" -sequence_lengths="8 16 32 64 128 256 512 1024" +sequence_lengths="32 64" +# 8 16 32 64 128 256 512 1024" # Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model. # Not that different input count might lead to different performance @@ -57,7 +58,8 @@ sequence_lengths="8 16 32 64 128 256 512 1024" input_counts=1 # Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased -models_to_test="bert-base-cased roberta-base distilbert-base-uncased" +models_to_test="google/vit-base-patch16-224" +# bert-base-cased roberta-base distilbert-base-uncased" # If you have mutliple GPUs, you can choose one GPU for test. Here is an example to use the second GPU: # export CUDA_VISIBLE_DEVICES=1 From e27e1fd73b656bf8489b8e0f89f4c86d40c16c15 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Wed, 8 Mar 2023 20:27:50 -0800 Subject: [PATCH 03/22] Fix pytorch for VIT --- onnxruntime/python/tools/transformers/benchmark.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index afc392402cd27..7de3e881a621c 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -334,11 +334,15 @@ def run_pytorch( cache_dir=cache_dir, custom_model_class=model_class, ) + """ tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) max_input_size = ( tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 ) + """ + + max_input_size = 1024 # What to use for ViT? logger.debug(f"Model {model}") logger.debug(f"Number of parameters {model.num_parameters()}") @@ -362,6 +366,8 @@ def run_pytorch( # logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224])) + input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float, device=device) + """ input_ids = torch.randint( low=0, high=config.vocab_size - 1, @@ -369,6 +375,7 @@ def run_pytorch( dtype=torch.long, device=device, ) + """ try: inference = ( torch.jit.trace(model, input_ids) if torchscript else torch.compile(model) if torch2 else model From b6b313f54099000fb5fe29cdcabc67020ac606dc Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Tue, 14 Mar 2023 13:51:10 -0700 Subject: [PATCH 04/22] Update --- .../python/tools/transformers/benchmark.py | 39 ++++++++------- .../tools/transformers/onnx_exporter.py | 50 ++++++++----------- 2 files changed, 41 insertions(+), 48 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 7de3e881a621c..261e42e2a799f 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -334,15 +334,15 @@ def run_pytorch( cache_dir=cache_dir, custom_model_class=model_class, ) - """ - tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) - max_input_size = ( - tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 - ) - """ + if config.model_type == "vit": + max_input_size = 1024 # What to use for ViT? + else: + tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) - max_input_size = 1024 # What to use for ViT? + max_input_size = ( + tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 + ) logger.debug(f"Model {model}") logger.debug(f"Number of parameters {model.num_parameters()}") @@ -364,18 +364,19 @@ def run_pytorch( if max_input_size is not None and sequence_length > max_input_size: continue - # logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) - logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224])) - input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float, device=device) - """ - input_ids = torch.randint( - low=0, - high=config.vocab_size - 1, - size=(batch_size, sequence_length), - dtype=torch.long, - device=device, - ) - """ + if config.model_type == "vit": + logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224])) + input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float, device=device) + else: + logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) + input_ids = torch.randint( + low=0, + high=config.vocab_size - 1, + size=(batch_size, sequence_length), + dtype=torch.long, + device=device, + ) + try: inference = ( torch.jit.trace(model, input_ids) if torchscript else torch.compile(model) if torch2 else model diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index f947d04553c30..d2bff922a1b77 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -119,11 +119,6 @@ def build_dynamic_axes(example_inputs, outputs_flatten): return dynamic_axes, output_names def build_dynamic_axes_vit(example_inputs, outputs_flatten): - # dynamic_axes={ - # 'pixel_values': {0: 'batch_size', 1: 'num_channels', 2: 'height', 3:'width'}, - # 'logits': {0: 'batch_size', 1: 'sequence_length'} - # } - dynamic_axes = {key: {0: "pixel_values"} for key in example_inputs.keys()} output_names = ["logits"] return dynamic_axes, output_names @@ -311,6 +306,9 @@ def modelclass_dispatcher(model_name, custom_model_class): def load_pretrained_model(model_name, config, cache_dir, custom_model_class, is_tf_model=False): + if config.model_type=="vit": + return AutoModelForImageClassification.from_pretrained(model_name, config=config, cache_dir=cache_dir) + model_class_name = modelclass_dispatcher(model_name, custom_model_class) if model_class_name == "GPT2ModelNoPastState": @@ -488,34 +486,26 @@ def export_onnx_model_from_pt( # config, model = load_pt_model_from_tf(model_name) model.cpu() - """ - tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) - max_input_size = ( - tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 - ) - - example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt") + if config.model_type == "vit": + max_input_size = 1024 # What to use for ViT? - example_inputs = filter_inputs(example_inputs, input_names) - """ - - # url = 'http://images.cocodataset.org/val2017/000000039769.jpg' # Egyptian cats - # image = Image.open(requests.get(url, stream=True).raw) - # processor = ViTImageProcessor.from_pretrained(model_name) - # model = ViTForImageClassification.from_pretrained(model_name) - model = AutoModelForImageClassification.from_pretrained(model_name) - - # example_inputs = processor(images=image, return_tensors="pt") + example_inputs = inputs = { 'pixel_values' : torch.rand(2,3,224,224) } + example_outputs = model(**example_inputs) + else: + tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) + max_input_size = ( + tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 + ) - max_input_size = 1024 # What to use for ViT? + example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt") - example_inputs = inputs = { 'pixel_values' : torch.rand(2,3,224,224) } - example_outputs = model(**example_inputs) + example_inputs = filter_inputs(example_inputs, input_names) - # assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" + example_outputs = model(**example_inputs) + assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" # Flatten is needed for gpt2 and distilgpt2. - example_outputs_flatten = flatten(example_outputs['logits']) + example_outputs_flatten = flatten(example_outputs) example_outputs_flatten = update_flatten_list(example_outputs_flatten, []) onnx_model_path = get_onnx_file_path( @@ -533,8 +523,10 @@ def export_onnx_model_from_pt( logger.info("Exporting ONNX model to {}".format(onnx_model_path)) Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True) - # dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) - dynamic_axes, output_names = build_dynamic_axes_vit(example_inputs, example_outputs_flatten) + if config.model_type == "vit": + dynamic_axes, output_names = build_dynamic_axes_vit(example_inputs, example_outputs_flatten) + else: + dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) replace_torch_functions() torch_onnx_export( From 807afeda28afca9098430ecbab8a986674c8e3c7 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Tue, 14 Mar 2023 15:37:52 -0700 Subject: [PATCH 05/22] Simplifiy & cleanup --- .../python/tools/transformers/benchmark.py | 44 ++++++++++++------ .../tools/transformers/dev_benchmark.cmd | 14 +++--- .../tools/transformers/fusion_attention.py | 4 +- .../python/tools/transformers/import.py | 20 -------- .../tools/transformers/onnx_exporter.py | 46 ++++++++++++++----- 5 files changed, 70 insertions(+), 58 deletions(-) delete mode 100644 onnxruntime/python/tools/transformers/import.py diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 23f1be3eeed2f..804ae83dcc12e 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -257,9 +257,14 @@ def run_onnxruntime( "datetime": str(datetime.now()), } - logger.info( - "Run onnxruntime on {} with input shape {}".format(model_name, [batch_size, sequence_length]) - ) + if config.model_type == "vit": + logger.info( + "Run onnxruntime on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224]) + ) + else: + logger.info( + "Run onnxruntime on {} with input shape {}".format(model_name, [batch_size, sequence_length]) + ) if disable_ort_io_binding: result = inference_ort( @@ -333,11 +338,15 @@ def run_pytorch( cache_dir=cache_dir, custom_model_class=model_class, ) - tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) - max_input_size = ( - tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 - ) + if config.model_type == "vit": + max_input_size = 1024 # What to use for ViT? + else: + tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) + + max_input_size = ( + tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 + ) logger.debug(f"Model {model}") logger.debug(f"Number of parameters {model.num_parameters()}") @@ -359,14 +368,19 @@ def run_pytorch( if max_input_size is not None and sequence_length > max_input_size: continue - logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) - input_ids = torch.randint( - low=0, - high=config.vocab_size - 1, - size=(batch_size, sequence_length), - dtype=torch.long, - device=device, - ) + if config.model_type == "vit": + logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224])) + input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float, device=device) + else: + logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) + input_ids = torch.randint( + low=0, + high=config.vocab_size - 1, + size=(batch_size, sequence_length), + dtype=torch.long, + device=device, + ) + try: inference = ( torch.jit.trace(model, input_ids) if torchscript else torch.compile(model) if torch2 else model diff --git a/onnxruntime/python/tools/transformers/dev_benchmark.cmd b/onnxruntime/python/tools/transformers/dev_benchmark.cmd index c3de6519bd197..7a9b3254a1708 100644 --- a/onnxruntime/python/tools/transformers/dev_benchmark.cmd +++ b/onnxruntime/python/tools/transformers/dev_benchmark.cmd @@ -21,27 +21,25 @@ set run_torchscript=false REM Devices to test. REM Attention: You cannot run both CPU and GPU at the same time: gpu need onnxruntime-gpu, and CPU need onnxruntime. -set run_gpu_fp32=true +set run_gpu_fp32=false set run_gpu_fp16=false -set run_cpu_fp32=false -set run_cpu_int8=false +set run_cpu_fp32=true +set run_cpu_int8=true set average_over=100 REM Enable optimizer (use script instead of OnnxRuntime for graph optimization) set use_optimizer=true -set batch_sizes=1 4 -set sequence_length=32 64 +set batch_sizes=1 +set sequence_length=8 128 REM Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model. REM Note that different input count might lead to different performance set input_counts=1 REM Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased -REM set models_to_test=bert-base-cased -set models_to_test="google/vit-base-patch16-224" -REM set models_to_test="google/vit-base-patch32" +set models_to_test=bert-base-cased REM If you have mutliple GPUs, you can choose one GPU for test. Here is an example to use the second GPU: REM set CUDA_VISIBLE_DEVICES=1 diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 82d8095328765..c834862f062ca 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -586,10 +586,8 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): ], output_name_to_node, ) -# if mask_nodes is None: -# logger.debug("fuse_attention: failed to match mask path") -# return + # ViT models have no mask nodes, so only do them for models that have them if mask_nodes is not None and len(mask_nodes) > 1 and mask_nodes[0].op_type == "Mul": _, mul_val = self.model.get_constant_input(mask_nodes[0]) if mul_val != -10000: diff --git a/onnxruntime/python/tools/transformers/import.py b/onnxruntime/python/tools/transformers/import.py deleted file mode 100644 index 9c734e459ae40..0000000000000 --- a/onnxruntime/python/tools/transformers/import.py +++ /dev/null @@ -1,20 +0,0 @@ -from transformers import AutoImageProcessor, ViTModel -import torch -from datasets import load_dataset - -dataset = load_dataset("huggingface/cats-image") -image = dataset["test"]["image"][0] - -image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k") -model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k") - -inputs = image_processor(image, return_tensors="pt") - -with torch.no_grad(): - outputs = model(**inputs) - -# last_hidden_states = outputs.last_hidden_state -# list(last_hidden_states.shape) - -# print(inputs) -torch.onnx.export(model, inputs['pixel_values'], "TestModel.onnx", verbose=True) diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index c4dda99496ebe..c490be2499826 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -16,7 +16,7 @@ from huggingface_models import MODEL_CLASSES from quantize_helper import QuantizeHelper from torch_onnx_export_helper import torch_onnx_export -from transformers import AutoConfig, AutoTokenizer, LxmertConfig, TransfoXLConfig +from transformers import AutoConfig, AutoModelForImageClassification, AutoTokenizer, LxmertConfig, TransfoXLConfig sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) from gpt2_helper import PRETRAINED_GPT2_MODELS, GPT2ModelNoPastState, TFGPT2ModelNoPastState @@ -49,8 +49,12 @@ def restore_torch_functions(): def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names, config, data_type=numpy.int64): - input_ids = numpy.random.randint(low=0, high=vocab_size - 1, size=(batch_size, sequence_length), dtype=data_type) + if config.model_type=="vit": + input_ids = numpy.random.rand(batch_size, 3, 224, 224).astype(numpy.float32) + inputs = {"pixel_values": input_ids} + return inputs + input_ids = numpy.random.randint(low=0, high=vocab_size - 1, size=(batch_size, sequence_length), dtype=data_type) inputs = {"input_ids": input_ids} if "attention_mask" in input_names: @@ -106,6 +110,10 @@ def build_dynamic_axes(example_inputs, outputs_flatten): dynamic_axes[output_name].update({j: "seq_len"}) return dynamic_axes, output_names +def build_dynamic_axes_vit(example_inputs, outputs_flatten): + dynamic_axes = {key: {0: "pixel_values"} for key in example_inputs.keys()} + output_names = ["logits"] + return dynamic_axes, output_names def validate_onnx_model( onnx_model_path, @@ -290,6 +298,9 @@ def modelclass_dispatcher(model_name, custom_model_class): def load_pretrained_model(model_name, config, cache_dir, custom_model_class, is_tf_model=False): + if config.model_type=="vit": + return AutoModelForImageClassification.from_pretrained(model_name, config=config, cache_dir=cache_dir) + model_class_name = modelclass_dispatcher(model_name, custom_model_class) if model_class_name == "GPT2ModelNoPastState": @@ -439,7 +450,10 @@ def validate_and_optimize_onnx( model_fusion_statistics, ) - return onnx_model_path, is_valid_onnx_model, config.vocab_size + if config.model_type == "vit": + return onnx_model_path, is_valid_onnx_model, config.num_labels + else: + return onnx_model_path, is_valid_onnx_model, config.vocab_size def export_onnx_model_from_pt( @@ -466,18 +480,23 @@ def export_onnx_model_from_pt( # config, model = load_pt_model_from_tf(model_name) model.cpu() - tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) - max_input_size = ( - tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 - ) + if config.model_type == "vit": + max_input_size = 1024 # What to use for ViT? - example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt") + example_inputs = inputs = { 'pixel_values' : torch.rand(2,3,224,224) } + example_outputs = model(**example_inputs) + else: + tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) + max_input_size = ( + tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024 + ) - example_inputs = filter_inputs(example_inputs, input_names) + example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt") - example_outputs = model(**example_inputs) + example_inputs = filter_inputs(example_inputs, input_names) - assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" + example_outputs = model(**example_inputs) + assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" # Flatten is needed for gpt2 and distilgpt2. example_outputs_flatten = flatten(example_outputs) @@ -498,7 +517,10 @@ def export_onnx_model_from_pt( logger.info("Exporting ONNX model to {}".format(onnx_model_path)) Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True) - dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) + if config.model_type == "vit": + dynamic_axes, output_names = build_dynamic_axes_vit(example_inputs, example_outputs_flatten) + else: + dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) replace_torch_functions() torch_onnx_export( From 6054a5d89268260b3a23c4756958a2c5bbf5d57c Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Tue, 14 Mar 2023 15:40:36 -0700 Subject: [PATCH 06/22] Fix --- .../python/tools/transformers/huggingface_models.py | 5 +++-- onnxruntime/python/tools/transformers/onnx_exporter.py | 2 -- onnxruntime/python/tools/transformers/run_benchmark.sh | 8 +++----- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index 0aabcad3de2a2..47595db17d3a6 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -16,8 +16,6 @@ # List of pretrained models: https://huggingface.co/transformers/pretrained_models.html # Pretrained model name to a tuple of input names, opset_version, use_external_data_format, optimization model type MODELS = { - "google/vit-base-patch16-224": (["input_ids"], 12, False, "bert"), - # BERT "bert-base-uncased": ( ["input_ids", "attention_mask", "token_type_ids"], @@ -160,4 +158,7 @@ ), # "google/pegasus-xsum": (["input_ids"], 11, False, "bert"), # "google/pegasus-large": (["input_ids"], 11, False, "bert"), + + # ViT + "google/vit-base-patch16-224": (["input_ids"], 12, False, "bert"), } diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index e7faca6699747..c490be2499826 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -10,12 +10,10 @@ from pathlib import Path import numpy -import requests import torch from affinity_helper import AffinitySetting from benchmark_helper import OptimizerInfo, Precision, create_onnxruntime_session from huggingface_models import MODEL_CLASSES -from PIL import Image from quantize_helper import QuantizeHelper from torch_onnx_export_helper import torch_onnx_export from transformers import AutoConfig, AutoModelForImageClassification, AutoTokenizer, LxmertConfig, TransfoXLConfig diff --git a/onnxruntime/python/tools/transformers/run_benchmark.sh b/onnxruntime/python/tools/transformers/run_benchmark.sh index ee33555b8e526..f0422839c11eb 100644 --- a/onnxruntime/python/tools/transformers/run_benchmark.sh +++ b/onnxruntime/python/tools/transformers/run_benchmark.sh @@ -14,7 +14,7 @@ use_package=true # only need once -run_install=false +run_install=true # Engines to test. # To run ort_trt, you need to build and install the onnxruntime-gpu-tensorrt package on your own @@ -49,8 +49,7 @@ layer_number=16 # Batch Sizes and Sequence Lengths batch_sizes="1 4" -sequence_lengths="32 64" -# 8 16 32 64 128 256 512 1024" +sequence_lengths="8 16 32 64 128 256 512 1024" # Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model. # Not that different input count might lead to different performance @@ -58,8 +57,7 @@ sequence_lengths="32 64" input_counts=1 # Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased -models_to_test="google/vit-base-patch16-224" -# bert-base-cased roberta-base distilbert-base-uncased" +models_to_test="bert-base-cased roberta-base distilbert-base-uncased" # If you have mutliple GPUs, you can choose one GPU for test. Here is an example to use the second GPU: # export CUDA_VISIBLE_DEVICES=1 From 0d8b4e939d7a4afd7afa17f64622c8eebffbe48b Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Wed, 22 Mar 2023 20:16:50 -0700 Subject: [PATCH 07/22] Stash changes --- onnxruntime/python/tools/transformers/benchmark.py | 4 ++-- onnxruntime/python/tools/transformers/onnx_exporter.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 804ae83dcc12e..3c8114a04247c 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -340,7 +340,7 @@ def run_pytorch( ) if config.model_type == "vit": - max_input_size = 1024 # What to use for ViT? + max_input_size = 1024 # Just needs to be greater than sequence_length else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) @@ -370,7 +370,7 @@ def run_pytorch( if config.model_type == "vit": logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, 3, 224, 224])) - input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float, device=device) + input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float16 if precision == Precision.FLOAT16 else torch.float32, device=device) else: logger.info("Run PyTorch on {} with input shape {}".format(model_name, [batch_size, sequence_length])) input_ids = torch.randint( diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index c490be2499826..41159b8189296 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -481,7 +481,7 @@ def export_onnx_model_from_pt( model.cpu() if config.model_type == "vit": - max_input_size = 1024 # What to use for ViT? + max_input_size = 1024 # Just needs to be greater than sequence_length example_inputs = inputs = { 'pixel_values' : torch.rand(2,3,224,224) } example_outputs = model(**example_inputs) From 441b555d2355476a63ebabf85e299f41eb314cb5 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Tue, 4 Apr 2023 15:16:00 -0700 Subject: [PATCH 08/22] Stash changes --- onnxruntime/python/tools/transformers/huggingface_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index 47595db17d3a6..cf868a55f6433 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -160,5 +160,5 @@ # "google/pegasus-large": (["input_ids"], 11, False, "bert"), # ViT - "google/vit-base-patch16-224": (["input_ids"], 12, False, "bert"), + "google/vit-base-patch16-224": (["input_ids"], 12, False, "vit"), } From fe934a102d7956f6983b69dd01c233d30cad1b3e Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Thu, 6 Apr 2023 14:22:16 -0700 Subject: [PATCH 09/22] Swin benchmarking --- .../python/tools/transformers/benchmark.py | 6 ++-- .../tools/transformers/fusion_attention.py | 12 ++++---- .../tools/transformers/huggingface_models.py | 5 ++-- .../tools/transformers/onnx_exporter.py | 29 ++++++++----------- .../python/tools/transformers/optimizer.py | 3 +- 5 files changed, 26 insertions(+), 29 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 992a70fecc601..376cbc22dbe40 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -262,7 +262,7 @@ def run_onnxruntime( "datetime": str(datetime.now()), } - if config.model_type == "vit": + if config.model_type == "vit" or config.model_type == "swin": logger.info(f"Run onnxruntime on {model_name} with input shape {[batch_size, 3, 224, 224]}") else: logger.info(f"Run onnxruntime on {model_name} with input shape {[batch_size, sequence_length]}") @@ -340,7 +340,7 @@ def run_pytorch( custom_model_class=model_class, ) - if config.model_type == "vit": + if config.model_type == "vit" or config.model_type == "swin": max_input_size = 1024 # Just needs to be greater than sequence_length else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) @@ -369,7 +369,7 @@ def run_pytorch( if max_input_size is not None and sequence_length > max_input_size: continue - if config.model_type == "vit": + if config.model_type == "vit" or config.model_type == "swin": logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, 3, 224, 224]}") input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float16 if precision == Precision.FLOAT16 else torch.float32, device=device) else: diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index 79cbf83e1c214..c477ce08db73a 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -458,13 +458,13 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): if input not in output_name_to_node: continue - if input == qkv_nodes[0].output[0]: - continue - other_inputs.append(input) - if len(other_inputs) != 1: - return + if input == qkv_nodes[0].output[0]: + continue + other_inputs.append(input) + if len(other_inputs) != 1: + return - root_input = other_inputs[0] + root_input = other_inputs[0] """ diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index d00d5cabf33f7..f6899a6527d6a 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -156,10 +156,11 @@ False, "bert", ), - "google/vit-base-patch16-224": (["pixel_values"], 12, False, "vit"), # "google/pegasus-xsum": (["input_ids"], 11, False, "bert"), # "google/pegasus-large": (["input_ids"], 11, False, "bert"), # ViT - "google/vit-base-patch16-224": (["input_ids"], 12, False, "vit"), + "google/vit-base-patch16-224": (["pixel_values"], 12, False, "vit"), + # Swin + "microsoft/swin-base-patch4-window7-224": (["pixel_values"], 12, False, "swin"), } diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index bb3305ea25b41..b7ad1d40d98a6 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -16,7 +16,7 @@ from huggingface_models import MODEL_CLASSES from quantize_helper import QuantizeHelper from torch_onnx_export_helper import torch_onnx_export -from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer, LxmertConfig, TransfoXLConfig +from transformers import AutoConfig, AutoModelForImageClassification, AutoFeatureExtractor, AutoTokenizer, LxmertConfig, TransfoXLConfig sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) from gpt2_helper import PRETRAINED_GPT2_MODELS, GPT2ModelNoPastState, TFGPT2ModelNoPastState # noqa: E402 @@ -110,11 +110,6 @@ def build_dynamic_axes(example_inputs, outputs_flatten): dynamic_axes[output_name].update({j: "seq_len"}) return dynamic_axes, output_names -def build_dynamic_axes_vit(example_inputs, outputs_flatten): - dynamic_axes = {key: {0: "pixel_values"} for key in example_inputs.keys()} - output_names = ["logits"] - return dynamic_axes, output_names - def validate_onnx_model( onnx_model_path, example_inputs, @@ -255,8 +250,8 @@ def optimize_onnx_model( opt_model = optimize_model( onnx_model_path, model_type, - num_heads=num_attention_heads, - hidden_size=hidden_size, + num_heads=num_attention_heads if model_type != "swin" else 0, + hidden_size=hidden_size if model_type != "swin" else 0, opt_level=0, optimization_options=optimization_options, use_gpu=use_gpu, @@ -298,9 +293,6 @@ def modelclass_dispatcher(model_name, custom_model_class): def load_pretrained_model(model_name, config, cache_dir, custom_model_class, is_tf_model=False): - if config.model_type=="vit": - return AutoModelForImageClassification.from_pretrained(model_name, config=config, cache_dir=cache_dir) - model_class_name = modelclass_dispatcher(model_name, custom_model_class) if model_class_name == "GPT2ModelNoPastState": @@ -479,8 +471,11 @@ def export_onnx_model_from_pt( example_inputs = None max_input_size = None - if model_type == "vit": - example_inputs = inputs = { 'pixel_values' : torch.rand(2,3,224,224) } + if model_type == "vit" or model_type == "swin": + image_processor = AutoFeatureExtractor.from_pretrained(model_name, cache_dir=cache_dir) + data = numpy.random.randint(low=0, high=256, size=224 * 224 * 3, dtype=numpy.uint8).reshape(224, 224, 3) + + example_inputs = image_processor(data, return_tensors="pt") else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) max_input_size = ( @@ -489,10 +484,10 @@ def export_onnx_model_from_pt( example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt") - example_inputs = filter_inputs(example_inputs, input_names) + example_inputs = filter_inputs(example_inputs, input_names) - example_outputs = model(**example_inputs) - assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" + example_outputs = model(**example_inputs) + assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" # Flatten is needed for gpt2 and distilgpt2. example_outputs_flatten = flatten(example_outputs) @@ -516,7 +511,7 @@ def export_onnx_model_from_pt( dynamic_axes = None output_names = None - if model_type == "vit": + if model_type == "vit" or model_type == "swin": dynamic_axes, output_names = {key: {0: "pixel_values"} for key in example_inputs}, ["logits"] else: dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py index 99ef58841d6cd..c73de6c52ad09 100644 --- a/onnxruntime/python/tools/transformers/optimizer.py +++ b/onnxruntime/python/tools/transformers/optimizer.py @@ -57,6 +57,7 @@ "vae": (VaeOnnxModel, "pytorch", 1), "clip": (ClipOnnxModel, "pytorch", 1), "vit": (BertOnnxModel, "pytorch", 1), + "swin": (BertOnnxModel, "pytorch", 1), } @@ -160,7 +161,7 @@ def optimize_by_fusion( Returns: object of an optimizer class. """ - if model_type not in ["bert", "unet", "vae", "clip"] and (num_heads == 0 or hidden_size == 0): + if model_type not in ["bert", "vit", "swin", "unet", "vae", "clip"] and (num_heads == 0 or hidden_size == 0): logger.warning(f"Please specify parameters of num_heads and hidden_size for model_type {model_type}") (optimizer_class, producer, _) = MODEL_TYPES[model_type] From dc650905eb87fc32bab1bc857577d7ca01ed2cda Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Thu, 6 Apr 2023 14:44:22 -0700 Subject: [PATCH 10/22] Add more models, fix one bug --- onnxruntime/python/tools/transformers/fusion_attention.py | 2 -- onnxruntime/python/tools/transformers/huggingface_models.py | 2 ++ onnxruntime/python/tools/transformers/onnx_exporter.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/tools/transformers/fusion_attention.py b/onnxruntime/python/tools/transformers/fusion_attention.py index c477ce08db73a..47af94550910e 100644 --- a/onnxruntime/python/tools/transformers/fusion_attention.py +++ b/onnxruntime/python/tools/transformers/fusion_attention.py @@ -465,8 +465,6 @@ def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node): return root_input = other_inputs[0] - - """ Match flaubert Mask | diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index f6899a6527d6a..408b7b41f809d 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -163,4 +163,6 @@ "google/vit-base-patch16-224": (["pixel_values"], 12, False, "vit"), # Swin "microsoft/swin-base-patch4-window7-224": (["pixel_values"], 12, False, "swin"), + "microsoft/swin-small-patch4-window7-224": (["pixel_values"], 12, False, "swin"), + "microsoft/swin-tiny-patch4-window7-224": (["pixel_values"], 12, False, "swin"), } diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index b7ad1d40d98a6..21895c3e97b1a 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -442,7 +442,7 @@ def validate_and_optimize_onnx( model_fusion_statistics, ) - return onnx_model_path, is_valid_onnx_model, config.num_labels if model_type == "vit" else config.vocab_size + return onnx_model_path, is_valid_onnx_model, config.num_labels if model_type == "vit" or model_type == "swin" else config.vocab_size def export_onnx_model_from_pt( From c2a48eb9a85ca69aa671176f6c04f236595d8e5c Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Thu, 6 Apr 2023 15:07:00 -0700 Subject: [PATCH 11/22] Fix swin for benchmark --- onnxruntime/python/tools/transformers/onnx_exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 21895c3e97b1a..5d1b7555bac38 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -49,7 +49,7 @@ def restore_torch_functions(): def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names, config, data_type=numpy.int64): - if config.model_type=="vit": + if config.model_type=="vit" or config.model_type=="swin": input_ids = numpy.random.rand(batch_size, 3, 224, 224).astype(numpy.float32) inputs = {"pixel_values": input_ids} return inputs From f40e4db701b791191af87bef6352d1609f9f9db0 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Thu, 6 Apr 2023 20:21:01 -0700 Subject: [PATCH 12/22] Remove hardcoded 224 for vit/swin --- onnxruntime/python/tools/transformers/benchmark.py | 6 +++--- onnxruntime/python/tools/transformers/onnx_exporter.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 376cbc22dbe40..197cc73a4288f 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -263,7 +263,7 @@ def run_onnxruntime( } if config.model_type == "vit" or config.model_type == "swin": - logger.info(f"Run onnxruntime on {model_name} with input shape {[batch_size, 3, 224, 224]}") + logger.info(f"Run onnxruntime on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}") else: logger.info(f"Run onnxruntime on {model_name} with input shape {[batch_size, sequence_length]}") @@ -370,8 +370,8 @@ def run_pytorch( continue if config.model_type == "vit" or config.model_type == "swin": - logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, 3, 224, 224]}") - input_ids = torch.randn(size=(batch_size, 3, 224, 224),dtype=torch.float16 if precision == Precision.FLOAT16 else torch.float32, device=device) + logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}") + input_ids = torch.randn(size=(batch_size, 3, config.image_size, config.image_size),dtype=torch.float16 if precision == Precision.FLOAT16 else torch.float32, device=device) else: logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, sequence_length]}") input_ids = torch.randint( diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 5d1b7555bac38..33e443ff5be0e 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -50,7 +50,7 @@ def restore_torch_functions(): def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names, config, data_type=numpy.int64): if config.model_type=="vit" or config.model_type=="swin": - input_ids = numpy.random.rand(batch_size, 3, 224, 224).astype(numpy.float32) + input_ids = numpy.random.rand(batch_size, 3, config.image_size, config.image_size).astype(numpy.float32) inputs = {"pixel_values": input_ids} return inputs @@ -473,7 +473,7 @@ def export_onnx_model_from_pt( if model_type == "vit" or model_type == "swin": image_processor = AutoFeatureExtractor.from_pretrained(model_name, cache_dir=cache_dir) - data = numpy.random.randint(low=0, high=256, size=224 * 224 * 3, dtype=numpy.uint8).reshape(224, 224, 3) + data = numpy.random.randint(low=0, high=256, size=config.image_size * config.image_size * 3, dtype=numpy.uint8).reshape(config.image_size, config.image_size, 3) example_inputs = image_processor(data, return_tensors="pt") else: From 8ab290a9070889d023b26ad5d768816ca63d41e2 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Thu, 6 Apr 2023 20:24:28 -0700 Subject: [PATCH 13/22] Remove unused AutoModelForImageClassification --- onnxruntime/python/tools/transformers/onnx_exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 33e443ff5be0e..06e8330842e03 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -16,7 +16,7 @@ from huggingface_models import MODEL_CLASSES from quantize_helper import QuantizeHelper from torch_onnx_export_helper import torch_onnx_export -from transformers import AutoConfig, AutoModelForImageClassification, AutoFeatureExtractor, AutoTokenizer, LxmertConfig, TransfoXLConfig +from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer, LxmertConfig, TransfoXLConfig sys.path.append(os.path.join(os.path.dirname(__file__), "models", "gpt2")) from gpt2_helper import PRETRAINED_GPT2_MODELS, GPT2ModelNoPastState, TFGPT2ModelNoPastState # noqa: E402 From 456d95b7c402d42936e2793e8d57982686d69473 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Thu, 6 Apr 2023 20:30:35 -0700 Subject: [PATCH 14/22] Add comments, and simplify --- onnxruntime/python/tools/transformers/onnx_exporter.py | 3 ++- onnxruntime/python/tools/transformers/optimizer.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 06e8330842e03..2894d2b60af05 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -250,7 +250,7 @@ def optimize_onnx_model( opt_model = optimize_model( onnx_model_path, model_type, - num_heads=num_attention_heads if model_type != "swin" else 0, + num_heads=num_attention_heads if model_type != "swin" else 0, # For Swin, num_attention_heads is a list, so use 0 for now hidden_size=hidden_size if model_type != "swin" else 0, opt_level=0, optimization_options=optimization_options, @@ -487,6 +487,7 @@ def export_onnx_model_from_pt( example_inputs = filter_inputs(example_inputs, input_names) example_outputs = model(**example_inputs) + assert isinstance(example_outputs, (list, tuple)), f"type of output is not list or tuple: {type(example_outputs)}" # Flatten is needed for gpt2 and distilgpt2. diff --git a/onnxruntime/python/tools/transformers/optimizer.py b/onnxruntime/python/tools/transformers/optimizer.py index c73de6c52ad09..de12624821d38 100644 --- a/onnxruntime/python/tools/transformers/optimizer.py +++ b/onnxruntime/python/tools/transformers/optimizer.py @@ -161,7 +161,7 @@ def optimize_by_fusion( Returns: object of an optimizer class. """ - if model_type not in ["bert", "vit", "swin", "unet", "vae", "clip"] and (num_heads == 0 or hidden_size == 0): + if model_type not in ["bert", "swin", "unet", "vae", "clip"] and (num_heads == 0 or hidden_size == 0): logger.warning(f"Please specify parameters of num_heads and hidden_size for model_type {model_type}") (optimizer_class, producer, _) = MODEL_TYPES[model_type] From 50fa2f9f63fc6d40965223ffcae6a59e0fadf894 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 11:10:36 -0700 Subject: [PATCH 15/22] Fix python formatting --- .../python/tools/transformers/benchmark.py | 16 ++++++++++---- .../tools/transformers/onnx_exporter.py | 21 ++++++++++++++----- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 197cc73a4288f..60b841685da77 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -263,7 +263,9 @@ def run_onnxruntime( } if config.model_type == "vit" or config.model_type == "swin": - logger.info(f"Run onnxruntime on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}") + logger.info( + f"Run onnxruntime on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}" + ) else: logger.info(f"Run onnxruntime on {model_name} with input shape {[batch_size, sequence_length]}") @@ -341,7 +343,7 @@ def run_pytorch( ) if config.model_type == "vit" or config.model_type == "swin": - max_input_size = 1024 # Just needs to be greater than sequence_length + max_input_size = 1024 # Just needs to be greater than sequence_length else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) @@ -370,8 +372,14 @@ def run_pytorch( continue if config.model_type == "vit" or config.model_type == "swin": - logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}") - input_ids = torch.randn(size=(batch_size, 3, config.image_size, config.image_size),dtype=torch.float16 if precision == Precision.FLOAT16 else torch.float32, device=device) + logger.info( + f"Run PyTorch on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}" + ) + input_ids = torch.randn( + size=(batch_size, 3, config.image_size, config.image_size), + dtype=torch.float16 if precision == Precision.FLOAT16 else torch.float32, + device=device + ) else: logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, sequence_length]}") input_ids = torch.randint( diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 2894d2b60af05..0f237c955f4d5 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -49,7 +49,7 @@ def restore_torch_functions(): def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names, config, data_type=numpy.int64): - if config.model_type=="vit" or config.model_type=="swin": + if config.model_type == "vit" or config.model_type == "swin": input_ids = numpy.random.rand(batch_size, 3, config.image_size, config.image_size).astype(numpy.float32) inputs = {"pixel_values": input_ids} return inputs @@ -244,14 +244,19 @@ def optimize_onnx_model( if precision == Precision.INT8: optimization_options.enable_embed_layer_norm = False + # For swin models, the num_attention_heads is a list, which isn't supported yet, so set to 0 for now + if model_type == "swin": + num_attention_heads = 0 + hidden_size = 0 + # Use script to optimize model. # Use opt_level <= 1 for models to be converted to fp16, because some fused op (like FusedGemm) has only fp32 and no fp16. # It is better to be conservative so we use opt_level=0 here, in case MemcpyFromHost is added to the graph by OnnxRuntime. opt_model = optimize_model( onnx_model_path, model_type, - num_heads=num_attention_heads if model_type != "swin" else 0, # For Swin, num_attention_heads is a list, so use 0 for now - hidden_size=hidden_size if model_type != "swin" else 0, + num_heads=num_attention_heads, + hidden_size=hidden_size, opt_level=0, optimization_options=optimization_options, use_gpu=use_gpu, @@ -442,7 +447,11 @@ def validate_and_optimize_onnx( model_fusion_statistics, ) - return onnx_model_path, is_valid_onnx_model, config.num_labels if model_type == "vit" or model_type == "swin" else config.vocab_size + return ( + onnx_model_path, + is_valid_onnx_model, + config.num_labels if model_type == "vit" or model_type == "swin" else config.vocab_size + ) def export_onnx_model_from_pt( @@ -473,7 +482,9 @@ def export_onnx_model_from_pt( if model_type == "vit" or model_type == "swin": image_processor = AutoFeatureExtractor.from_pretrained(model_name, cache_dir=cache_dir) - data = numpy.random.randint(low=0, high=256, size=config.image_size * config.image_size * 3, dtype=numpy.uint8).reshape(config.image_size, config.image_size, 3) + data = numpy.random.randint( + low=0, high=256, size=config.image_size * config.image_size * 3, dtype=numpy.uint8 + ).reshape(config.image_size, config.image_size, 3) example_inputs = image_processor(data, return_tensors="pt") else: From ecd65f47e247e4e59ced64e05bdebc3664ff0d8a Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 12:51:03 -0700 Subject: [PATCH 16/22] Fix formatting --- onnxruntime/python/tools/transformers/benchmark.py | 4 ++-- onnxruntime/python/tools/transformers/huggingface_models.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 60b841685da77..6909697848505 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -343,7 +343,7 @@ def run_pytorch( ) if config.model_type == "vit" or config.model_type == "swin": - max_input_size = 1024 # Just needs to be greater than sequence_length + max_input_size = 1024 # Just needs to be greater than sequence_length else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) @@ -378,7 +378,7 @@ def run_pytorch( input_ids = torch.randn( size=(batch_size, 3, config.image_size, config.image_size), dtype=torch.float16 if precision == Precision.FLOAT16 else torch.float32, - device=device + device=device, ) else: logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, sequence_length]}") diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py index 408b7b41f809d..dcfe4a28ad9af 100644 --- a/onnxruntime/python/tools/transformers/huggingface_models.py +++ b/onnxruntime/python/tools/transformers/huggingface_models.py @@ -158,7 +158,6 @@ ), # "google/pegasus-xsum": (["input_ids"], 11, False, "bert"), # "google/pegasus-large": (["input_ids"], 11, False, "bert"), - # ViT "google/vit-base-patch16-224": (["pixel_values"], 12, False, "vit"), # Swin From 2a336bd3610aec8ddcecf94dd5c149a9ae75eaf6 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 13:03:11 -0700 Subject: [PATCH 17/22] Formatting --- onnxruntime/python/tools/transformers/onnx_exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 0f237c955f4d5..309ad87045d05 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -450,7 +450,7 @@ def validate_and_optimize_onnx( return ( onnx_model_path, is_valid_onnx_model, - config.num_labels if model_type == "vit" or model_type == "swin" else config.vocab_size + config.num_labels if model_type == "vit" or model_type == "swin" else config.vocab_size, ) From dbd9e0ad15a6a911afff02b655dfd22de9b015b1 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 13:09:59 -0700 Subject: [PATCH 18/22] Formatting --- onnxruntime/python/tools/transformers/onnx_exporter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 309ad87045d05..1189846cd4fae 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -110,6 +110,7 @@ def build_dynamic_axes(example_inputs, outputs_flatten): dynamic_axes[output_name].update({j: "seq_len"}) return dynamic_axes, output_names + def validate_onnx_model( onnx_model_path, example_inputs, From 43df1ed98764d07cb693237aa4e769fc074bdfc2 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 16:04:35 -0700 Subject: [PATCH 19/22] Code review feedback --- onnxruntime/python/tools/transformers/benchmark.py | 14 +++++++------- .../python/tools/transformers/onnx_exporter.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index 6909697848505..b66bb70cce549 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -262,7 +262,7 @@ def run_onnxruntime( "datetime": str(datetime.now()), } - if config.model_type == "vit" or config.model_type == "swin": + if config.model_type in ["vit", "swin"]: logger.info( f"Run onnxruntime on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}" ) @@ -342,8 +342,8 @@ def run_pytorch( custom_model_class=model_class, ) - if config.model_type == "vit" or config.model_type == "swin": - max_input_size = 1024 # Just needs to be greater than sequence_length + if config.model_type in ["vit", "swin"]: + sequence_lengths = [1] # Set array to one entry so we iterate once, and ignore any extra lengths else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) @@ -368,10 +368,7 @@ def run_pytorch( continue for sequence_length in sequence_lengths: - if max_input_size is not None and sequence_length > max_input_size: - continue - - if config.model_type == "vit" or config.model_type == "swin": + if config.model_type in ["vit", "swin"]: logger.info( f"Run PyTorch on {model_name} with input shape {[batch_size, 3, config.image_size, config.image_size]}" ) @@ -381,6 +378,9 @@ def run_pytorch( device=device, ) else: + if sequence_length > max_input_size: + continue + logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, sequence_length]}") input_ids = torch.randint( low=0, diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py index 1189846cd4fae..38f7f8cd05f1d 100644 --- a/onnxruntime/python/tools/transformers/onnx_exporter.py +++ b/onnxruntime/python/tools/transformers/onnx_exporter.py @@ -49,7 +49,7 @@ def restore_torch_functions(): def create_onnxruntime_input(vocab_size, batch_size, sequence_length, input_names, config, data_type=numpy.int64): - if config.model_type == "vit" or config.model_type == "swin": + if config.model_type in ["vit", "swin"]: input_ids = numpy.random.rand(batch_size, 3, config.image_size, config.image_size).astype(numpy.float32) inputs = {"pixel_values": input_ids} return inputs @@ -451,7 +451,7 @@ def validate_and_optimize_onnx( return ( onnx_model_path, is_valid_onnx_model, - config.num_labels if model_type == "vit" or model_type == "swin" else config.vocab_size, + config.num_labels if model_type in ["vit", "swin"] else config.vocab_size, ) @@ -481,7 +481,7 @@ def export_onnx_model_from_pt( example_inputs = None max_input_size = None - if model_type == "vit" or model_type == "swin": + if model_type in ["vit", "swin"]: image_processor = AutoFeatureExtractor.from_pretrained(model_name, cache_dir=cache_dir) data = numpy.random.randint( low=0, high=256, size=config.image_size * config.image_size * 3, dtype=numpy.uint8 @@ -524,7 +524,7 @@ def export_onnx_model_from_pt( dynamic_axes = None output_names = None - if model_type == "vit" or model_type == "swin": + if model_type in ["vit", "swin"]: dynamic_axes, output_names = {key: {0: "pixel_values"} for key in example_inputs}, ["logits"] else: dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten) From a2fc5c5f265c774866c821f80c9aeb8f197e8922 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 16:06:16 -0700 Subject: [PATCH 20/22] Minor fix --- onnxruntime/python/tools/transformers/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index b66bb70cce549..c194b4b6414f5 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -378,7 +378,7 @@ def run_pytorch( device=device, ) else: - if sequence_length > max_input_size: + if max_input_size is not None and sequence_length > max_input_size: continue logger.info(f"Run PyTorch on {model_name} with input shape {[batch_size, sequence_length]}") From 7444ed5982f8a2da9465dfda76d6212755ddc960 Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 16:57:34 -0700 Subject: [PATCH 21/22] Improve formatting --- onnxruntime/python/tools/transformers/benchmark.py | 6 +++++- .../python/tools/transformers/benchmark_helper.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index c194b4b6414f5..a47e1a2f8452f 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -343,7 +343,8 @@ def run_pytorch( ) if config.model_type in ["vit", "swin"]: - sequence_lengths = [1] # Set array to one entry so we iterate once, and ignore any extra lengths + # These models don't use sequence lengths, so just pick the first sequence length so that the summary still works + sequence_lengths = [ sequence_lengths[0] ] else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) @@ -786,6 +787,9 @@ def main(): logger.error("int8 is for CPU only") return + if len(args.models)==1 and MODELS[args.models[0]][3] in ["vit", "swim"]: + args.sequence_lengths = [""] + args.num_threads = sorted({cpu_count if x <= 0 else x for x in args.num_threads}) logger.info(f"Arguments: {args}") diff --git a/onnxruntime/python/tools/transformers/benchmark_helper.py b/onnxruntime/python/tools/transformers/benchmark_helper.py index fc2ec8ad8fd56..eb1a79d2e9d44 100644 --- a/onnxruntime/python/tools/transformers/benchmark_helper.py +++ b/onnxruntime/python/tools/transformers/benchmark_helper.py @@ -245,8 +245,11 @@ def output_summary(results, csv_filename, args): ] data_names = [] for batch_size in args.batch_sizes: - for sequence_length in args.sequence_lengths: - data_names.append(f"b{batch_size}_s{sequence_length}") + if args.sequence_lengths == [""]: + data_names.append(f"b{batch_size}") + else: + for sequence_length in args.sequence_lengths: + data_names.append(f"b{batch_size}_s{sequence_length}") csv_writer = csv.DictWriter(csv_file, fieldnames=header_names + data_names) csv_writer.writeheader() @@ -273,7 +276,10 @@ def output_summary(results, csv_filename, args): assert row[k] == headers[k] b = result["batch_size"] s = result["sequence_length"] - row[f"b{b}_s{s}"] = result["average_latency_ms"] + if s != "": + row[f"b{b}_s{s}"] = result["average_latency_ms"] + else: + row[f"b{b}"] = result["average_latency_ms"] if row: csv_writer.writerow(row) From 5cd613fec5f0c933cf87252a7a3628234a042d9e Mon Sep 17 00:00:00 2001 From: Ryan Hill Date: Fri, 7 Apr 2023 17:10:08 -0700 Subject: [PATCH 22/22] Python format --- onnxruntime/python/tools/transformers/benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py index a47e1a2f8452f..bd9a649ae74fd 100644 --- a/onnxruntime/python/tools/transformers/benchmark.py +++ b/onnxruntime/python/tools/transformers/benchmark.py @@ -344,7 +344,7 @@ def run_pytorch( if config.model_type in ["vit", "swin"]: # These models don't use sequence lengths, so just pick the first sequence length so that the summary still works - sequence_lengths = [ sequence_lengths[0] ] + sequence_lengths = [sequence_lengths[0]] else: tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) @@ -787,7 +787,7 @@ def main(): logger.error("int8 is for CPU only") return - if len(args.models)==1 and MODELS[args.models[0]][3] in ["vit", "swim"]: + if len(args.models) == 1 and MODELS[args.models[0]][3] in ["vit", "swim"]: args.sequence_lengths = [""] args.num_threads = sorted({cpu_count if x <= 0 else x for x in args.num_threads})