From 83dee8ee55b4a90eadf53cffc29b655880e225f4 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Tue, 27 Apr 2021 16:30:52 -0400 Subject: [PATCH 1/5] Update annotate.py minor edits, thx --- .../ultralytics/deepsparse/annotate.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/integrations/ultralytics/deepsparse/annotate.py b/integrations/ultralytics/deepsparse/annotate.py index d9c12caf328..a7d4c502cfd 100644 --- a/integrations/ultralytics/deepsparse/annotate.py +++ b/integrations/ultralytics/deepsparse/annotate.py @@ -52,7 +52,7 @@ benchmarks, this value is the number of cores per socket -s NUM_SOCKETS, --num-sockets NUM_SOCKETS - For DeepSparse engine only. The number of physical + For DeepSparse Engine only. The number of physical cores to run the annotations. Defaults to None where it uses all sockets available on the system -q, --quantized-inputs @@ -73,10 +73,10 @@ target FPS when writing video files. Frames will be dropped to closely match target FPS. --source must be a video file and if target-fps is greater than the - source video fps then it will be ignored. Defualt is + source video fps then it will be ignored. Default is None --no-save set flag when source is from webcam to not save - results. not supported for non webcam sources + results. not supported for non-webcam sources ########## Example command for running webcam annotations with pruned quantized YOLOv3: @@ -142,8 +142,8 @@ def parse_args(arguments=None): "model_filepath", type=str, help=( - "The full filepath of the ONNX model file or SparseZoo stub to the model " - "for deepsparse and onnxruntime engines. Path to a .pt loadable PyTorch " + "The full file path of the ONNX model file or SparseZoo stub to the model " + "for DeepSparse and ONNX Runtime Engines. Path to a .pt loadable PyTorch " "Module for torch - the Module can be the top-level object " "loaded or loaded into 'model' in a state dict" ), @@ -192,7 +192,7 @@ def parse_args(arguments=None): type=int, default=None, help=( - "For DeepSparse engine only. The number of physical cores to run the " + "For DeepSparse Engine only. The number of physical cores to run the " "annotations. Defaults to None where it uses all sockets available on the " "system" ), @@ -213,9 +213,9 @@ def parse_args(arguments=None): type=_parse_device, default=None, help=( - "Torch device id to run the model with. Default is cpu. Non cpu " + "Torch device id to run the model with. Default is cpu. Non-cpu " " only supported for torch benchmarking. Default is 'cpu' " - "unless running with torch and cuda is available, then cuda on " + "unless running with Torch and CUDA is available, then CUDA on " "device 0. i.e. 'cuda', 'cpu', 0, 'cuda:1'" ), ) @@ -241,7 +241,7 @@ def parse_args(arguments=None): help=( "target FPS when writing video files. Frames will be dropped to " "closely match target FPS. --source must be a video file and if target-fps " - "is greater than the source video fps then it will be ignored. Defualt is " + "is greater than the source video fps then it will be ignored. Default is " "None" ), ) @@ -250,7 +250,7 @@ def parse_args(arguments=None): action="store_true", help=( "set flag when source is from webcam to not save results. not supported " - "for non webcam sources" + "for non-webcam sources" ), ) From f824c72eeb406634d06c19dde56da0e5095a6ca4 Mon Sep 17 00:00:00 2001 From: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Date: Tue, 27 Apr 2021 18:27:38 -0400 Subject: [PATCH 2/5] Update annotate.py Getting additional edits at top of file to match ones below --- integrations/ultralytics/deepsparse/annotate.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/integrations/ultralytics/deepsparse/annotate.py b/integrations/ultralytics/deepsparse/annotate.py index a7d4c502cfd..4d315ebe731 100644 --- a/integrations/ultralytics/deepsparse/annotate.py +++ b/integrations/ultralytics/deepsparse/annotate.py @@ -28,16 +28,16 @@ Annotate images, videos, and streams with sparsified YOLOv3 models positional arguments: - model_filepath The full filepath of the ONNX model file or SparseZoo - stub to the model for deepsparse and onnxruntime - engines. Path to a .pt loadable PyTorch Module for + model_filepath The full file path of the ONNX model file or SparseZoo + stub to the model for DeepSparse and ONNX Runtime + Engines. Path to a .pt loadable PyTorch Module for torch - the Module can be the top-level object loaded or loaded into 'model' in a state dict optional arguments: -h, --help show this help message and exit --source SOURCE File path to image or directory of .jpg files, a .mp4 - video, or an integer (i.e. 0) for web-cam + video, or an integer (i.e. 0) for webcam -e {deepsparse,onnxruntime,torch}, --engine {deepsparse,onnxruntime,torch} Inference engine backend to run on. Choices are 'deepsparse', 'onnxruntime', and 'torch'. Default is @@ -61,8 +61,8 @@ --fp16 Set flag to execute with torch in half precision (fp16) --device DEVICE Torch device id to run the model with. Default is cpu. - Non cpu only supported for torch benchmarking. Default - is 'cpu' unless running with torch and cuda is + Non-cpu only supported for Torch benchmarking. Default + is 'cpu' unless running with Torch and CUDA is available, then cuda on device 0. i.e. 'cuda', 'cpu', 0, 'cuda:1' --save-dir SAVE_DIR directory to save all results to. defaults to @@ -214,7 +214,7 @@ def parse_args(arguments=None): default=None, help=( "Torch device id to run the model with. Default is cpu. Non-cpu " - " only supported for torch benchmarking. Default is 'cpu' " + " only supported for Torch benchmarking. Default is 'cpu' " "unless running with Torch and CUDA is available, then CUDA on " "device 0. i.e. 'cuda', 'cpu', 0, 'cuda:1'" ), From 52ab9e6a60e9e8fecb59317c1e85ed9ec07917d0 Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Wed, 28 Apr 2021 17:10:46 +0200 Subject: [PATCH 3/5] Fix for integrations/timm checkpoint path (#198) This PR fixes issue https://github.com/neuralmagic/sparseml/issues/197 Co-authored-by: Benjamin Fineran --- integrations/timm/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/timm/train.py b/integrations/timm/train.py index f4c155aab1d..4e9c8c5c028 100755 --- a/integrations/timm/train.py +++ b/integrations/timm/train.py @@ -389,7 +389,7 @@ def main(): elif args.initial_checkpoint.startswith("zoo:"): # Load weights from a SparseZoo model stub zoo_model = Zoo.load_model_from_stub(args.initial_checkpoint) - args.initial_checkpoint = zoo_model.download_framework_files(extensions=[".pth"]) + args.initial_checkpoint = zoo_model.download_framework_files(extensions=[".pth"])[0] #################################################################################### # End - SparseML optional load weights from SparseZoo #################################################################################### From 3a2b96df2ac340618b0422e0469f1d095c256fab Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Thu, 29 Apr 2021 13:37:41 +0200 Subject: [PATCH 4/5] Fix steps_per_epoch calculation (#201) Co-authored-by: Benjamin Fineran --- integrations/transformers/run_distill_qa.py | 47 +++++++++++---------- integrations/transformers/run_qa.py | 45 ++++++++++---------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/integrations/transformers/run_distill_qa.py b/integrations/transformers/run_distill_qa.py index ddaeca51dbd..4c9fc69e359 100644 --- a/integrations/transformers/run_distill_qa.py +++ b/integrations/transformers/run_distill_qa.py @@ -19,8 +19,8 @@ # limitations under the License. """ -Example script for integrating spaseml with the transformers library to perform model distillation. -This script is addopted from hugging face's implementation for Question Answering on the SQUAD Dataset. +Example script for integrating spaseml with the transformers library to perform model distillation. +This script is addopted from hugging face's implementation for Question Answering on the SQUAD Dataset. Hugging Face's original implementation is regularly updated and can be found at https://github.com/huggingface/transformers/blob/master/examples/question-answering/run_qa.py This script will: - Load transformer based models @@ -54,12 +54,12 @@ [--onnx_export_path] \ [--layers_to_keep] \ -Train, prune, and evaluate a transformer base question answering model on squad. +Train, prune, and evaluate a transformer base question answering model on squad. -h, --help show this help message and exit --teacher_model_name_or_path The name or path of model which will be used for distilation. Note, this model needs to be trained for QA task already. --student_model_name_or_path The name or path of the model wich will be trained using distilation. - --temperature Hyperparameter which controls model distilation + --temperature Hyperparameter which controls model distilation --distill_hardness Hyperparameter which controls how much of the loss comes from teacher vs training labels --model_name_or_path The path to the transformers model you wish to train or the name of the pretrained language model you wish @@ -72,21 +72,21 @@ or not. Default is false. --do_eval Boolean denoting if the model should be evaluated or not. Default is false. - --per_device_train_batch_size Size of each training batch based on samples per GPU. + --per_device_train_batch_size Size of each training batch based on samples per GPU. 12 will fit in a 11gb GPU, 16 in a 16gb. - --per_device_eval_batch_size Size of each training batch based on samples per GPU. + --per_device_eval_batch_size Size of each training batch based on samples per GPU. 12 will fit in a 11gb GPU, 16 in a 16gb. --learning_rate Learning rate initial float value. ex: 3e-5. - --max_seq_length Int for the max sequence length to be parsed as a context + --max_seq_length Int for the max sequence length to be parsed as a context window. ex: 384 tokens. --output_dir Path which model checkpoints and paths should be saved. - --overwrite_output_dir Boolean to define if the + --overwrite_output_dir Boolean to define if the --cache_dir Directiory which cached transformer files(datasets, models - , tokenizers) are saved for fast loading. + , tokenizers) are saved for fast loading. --preprocessing_num_workers The amount of cpu workers which are used to process datasets --seed Int which determines what random seed is for training/shuffling --nm_prune_config Path to the neural magic prune configuration file. examples can - be found in prune_config_files but are customized for bert-base-uncased. + be found in prune_config_files but are customized for bert-base-uncased. --do_onnx_export Boolean denoting if the model should be exported to onnx --onnx_export_path Path where onnx model path will be exported. ex: onnx-export --layers_to_keep Number of layers to keep from original model. Layers are dropped before training @@ -611,7 +611,7 @@ def prepare_validation_features(examples): ] return tokenized_examples - transformers.utils.logging.set_verbosity_info() + transformers.utils.logging.set_verbosity_info() parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, @@ -639,7 +639,7 @@ def prepare_validation_features(examples): ) logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) @@ -690,10 +690,10 @@ def prepare_validation_features(examples): student_model_parameters = filter(lambda p: p.requires_grad, student_model.parameters()) params = sum([np.prod(p.size()) for p in student_model_parameters]) - logger.info("Student Model has %s parameters", params) + logger.info("Student Model has %s parameters", params) teacher_model_parameters = filter(lambda p: p.requires_grad, teacher_model.parameters()) params = sum([np.prod(p.size()) for p in teacher_model_parameters]) - logger.info("Teacher Model has %s parameters", params) + logger.info("Teacher Model has %s parameters", params) # Tokenizer check: this script requires a fast tokenizer. if not isinstance(tokenizer, PreTrainedTokenizerFast): raise ValueError( @@ -710,7 +710,7 @@ def prepare_validation_features(examples): context_column_name = "context" if "context" in column_names else column_names[1] answer_column_name = "answers" if "answers" in column_names else column_names[2] - pad_on_right = tokenizer.padding_side == "right" + pad_on_right = tokenizer.padding_side == "right" data_collator = ( default_data_collator @@ -744,15 +744,16 @@ def prepare_validation_features(examples): ) #################################################################################### # Start SparseML Integration - #################################################################################### - optim = load_optimizer(student_model, TrainingArguments) - steps_per_epoch = math.ceil(len(datasets["train"]) / (training_args.per_device_train_batch_size*training_args._n_gpu)) - manager = ScheduledModifierManager.from_yaml(data_args.nm_prune_config) - training_args.num_train_epochs = float(manager.modifiers[0].end_epoch) - optim = ScheduledOptimizer(optim, student_model, manager, steps_per_epoch=steps_per_epoch, loggers=None) + #################################################################################### + if training_args.do_train: + optim = load_optimizer(student_model, TrainingArguments) + steps_per_epoch = math.ceil(len(train_dataset) / (training_args.per_device_train_batch_size * training_args._n_gpu)) + manager = ScheduledModifierManager.from_yaml(data_args.nm_prune_config) + training_args.num_train_epochs = float(manager.modifiers[0].end_epoch) + optim = ScheduledOptimizer(optim, student_model, manager, steps_per_epoch=steps_per_epoch, loggers=None) #################################################################################### # End SparseML Integration - #################################################################################### + #################################################################################### # Initialize our Trainer trainer = DistillQuestionAnsweringTrainer( model=student_model, @@ -764,7 +765,7 @@ def prepare_validation_features(examples): data_collator=data_collator, post_process_function=post_processing_function, compute_metrics=compute_metrics, - optimizers=(optim, None), + optimizers=(optim, None) if training_args.do_train else (None, None), teacher=teacher_model, distill_hardness = model_args.distill_hardness, temperature = model_args.temperature, diff --git a/integrations/transformers/run_qa.py b/integrations/transformers/run_qa.py index b6f683bc13d..a9ad798ac89 100644 --- a/integrations/transformers/run_qa.py +++ b/integrations/transformers/run_qa.py @@ -19,8 +19,8 @@ # limitations under the License. """ -Example script for integrating spaseml with the transformers library. -This script is addopted from hugging face's implementation for Question Answering on the SQUAD Dataset. +Example script for integrating spaseml with the transformers library. +This script is addopted from hugging face's implementation for Question Answering on the SQUAD Dataset. Hugging Face's original implementation is regularly updated and can be found at https://github.com/huggingface/transformers/blob/master/examples/question-answering/run_qa.py This script will: - Load transformer based modesl @@ -50,7 +50,7 @@ [--do_onnx_export] [--onnx_export_path] -Train, prune, and evaluate a transformer base question answering model on squad. +Train, prune, and evaluate a transformer base question answering model on squad. -h, --help show this help message and exit --model_name_or_path MODEL The path to the transformers model you wish to train or the name of the pretrained language model you wish @@ -63,21 +63,21 @@ or not. Default is false. --do_eval Boolean denoting if the model should be evaluated or not. Default is false. - --per_device_train_batch_size Size of each training batch based on samples per GPU. + --per_device_train_batch_size Size of each training batch based on samples per GPU. 12 will fit in a 11gb GPU, 16 in a 16gb. - --per_device_eval_batch_size Size of each training batch based on samples per GPU. + --per_device_eval_batch_size Size of each training batch based on samples per GPU. 12 will fit in a 11gb GPU, 16 in a 16gb. --learning_rate Learning rate initial float value. ex: 3e-5. - --max_seq_length Int for the max sequence length to be parsed as a context + --max_seq_length Int for the max sequence length to be parsed as a context window. ex: 384 tokens. --output_dir Path which model checkpoints and paths should be saved. - --overwrite_output_dir Boolean to define if the + --overwrite_output_dir Boolean to define if the --cache_dir Directiory which cached transformer files(datasets, models - , tokenizers) are saved for fast loading. + , tokenizers) are saved for fast loading. --preprocessing_num_workers The amount of cpu workers which are used to process datasets --seed Int which determines what random seed is for training/shuffling --nm_prune_config Path to the neural magic prune configuration file. examples can - be found in prune_config_files but are customized for bert-base-uncased. + be found in prune_config_files but are customized for bert-base-uncased. --do_onnx_export Boolean denoting if the model should be exported to onnx --onnx_export_path Path where onnx model path will be exported. ex: onnx-export @@ -101,7 +101,7 @@ --seed 42 \ --nm_prune_config prune_config_files/95sparsity1epoch.yaml \ --do_onnx_export \ - --onnx_export_path 95sparsity1epoch/ + --onnx_export_path 95sparsity1epoch/ """ import collections import json @@ -590,7 +590,7 @@ def prepare_validation_features(examples): return tokenized_examples - transformers.utils.logging.set_verbosity_info() + transformers.utils.logging.set_verbosity_info() parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, @@ -618,7 +618,7 @@ def prepare_validation_features(examples): ) logger.warning( - f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) @@ -663,7 +663,7 @@ def prepare_validation_features(examples): model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) - logger.info("Model has %s parameters", params) + logger.info("Model has %s parameters", params) # Tokenizer check: this script requires a fast tokenizer. if not isinstance(tokenizer, PreTrainedTokenizerFast): raise ValueError( @@ -679,7 +679,7 @@ def prepare_validation_features(examples): question_column_name = "question" if "question" in column_names else column_names[0] context_column_name = "context" if "context" in column_names else column_names[1] answer_column_name = "answers" if "answers" in column_names else column_names[2] - pad_on_right = tokenizer.padding_side == "right" + pad_on_right = tokenizer.padding_side == "right" if training_args.do_train: train_dataset = datasets["train"].map( @@ -714,12 +714,13 @@ def prepare_validation_features(examples): #################################################################################### # Start SparseML Integration - #################################################################################### - optim = load_optimizer(model, TrainingArguments) - steps_per_epoch = math.ceil(len(datasets["train"]) / (training_args.per_device_train_batch_size*training_args._n_gpu)) - manager = ScheduledModifierManager.from_yaml(data_args.nm_prune_config) - training_args.num_train_epochs = float(manager.max_epochs) - optim = ScheduledOptimizer(optim, model, manager, steps_per_epoch=steps_per_epoch, loggers=None) + #################################################################################### + if training_args.do_train: + optim = load_optimizer(model, TrainingArguments) + steps_per_epoch = math.ceil(len(train_dataset) / (training_args.per_device_train_batch_size * training_args._n_gpu)) + manager = ScheduledModifierManager.from_yaml(data_args.nm_prune_config) + training_args.num_train_epochs = float(manager.max_epochs) + optim = ScheduledOptimizer(optim, model, manager, steps_per_epoch=steps_per_epoch, loggers=None) #################################################################################### # End SparseML Integration #################################################################################### @@ -734,7 +735,7 @@ def prepare_validation_features(examples): data_collator=data_collator, post_process_function=post_processing_function, compute_metrics=compute_metrics, - optimizers=(optim, None), + optimizers=(optim, None) if training_args.do_train else (None, None), ) # Training @@ -765,7 +766,7 @@ def prepare_validation_features(examples): #################################################################################### if data_args.do_onnx_export: logger.info("*** Export to ONNX ***") - print("Exporting onnx model") + print("Exporting onnx model") os.environ["TOKENIZERS_PARALLELISM"] = "false" exporter = ModuleExporter( model, output_dir='onnx-export' From 29a96e0bed90c24d2bfa048b95632f4a6a50c590 Mon Sep 17 00:00:00 2001 From: Benjamin Fineran Date: Thu, 29 Apr 2021 10:01:34 -0400 Subject: [PATCH 5/5] YOLO webcam example - add assert for webcam load (#202) * YOLO webcam example - add assert for webcam load * update readme to note other options for annotate * formatting --- integrations/ultralytics/README.md | 7 ++++++- integrations/ultralytics/deepsparse/deepsparse_utils.py | 5 ++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/integrations/ultralytics/README.md b/integrations/ultralytics/README.md index 785d85f5b37..2958392d663 100644 --- a/integrations/ultralytics/README.md +++ b/integrations/ultralytics/README.md @@ -151,9 +151,14 @@ python annotate.py \ zoo:cv/detection/yolo_v3-spp/pytorch/ultralytics/coco/pruned_quant-aggressive_94 \ --source 0 \ --quantized-inputs \ - --image-shape 416 416 + --image-shape 416 416 \ + --no-save # webcam only ``` +In addition to webcam `--source` can take a path to a `.jpg` file, directory or glog pat +of `.jpg` files, or path to a `.mp4` video file. If source is an integer and no +corresponding webcam is available, an exception will be raised. + ### Benchmarking `benchmarking.py` is a script for benchmarking sparsified and quantized YOLOv3 diff --git a/integrations/ultralytics/deepsparse/deepsparse_utils.py b/integrations/ultralytics/deepsparse/deepsparse_utils.py index 0d8319e69f2..dc7cd1a4034 100644 --- a/integrations/ultralytics/deepsparse/deepsparse_utils.py +++ b/integrations/ultralytics/deepsparse/deepsparse_utils.py @@ -199,7 +199,10 @@ def __iter__(self) -> Iterator[Tuple[numpy.ndarray, numpy.ndarray]]: self._stream.release() cv2.destroyAllWindows() break - _, frame = self._stream.read() + loaded, frame = self._stream.read() + + assert loaded, f"Could not load image from webcam {self._camera}" + frame = cv2.flip(frame, 1) # flip left-right yield load_image(frame, image_size=self._image_size)