From 40fe31090efb9c12c6ce90ac46948eeb4eb926a3 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Thu, 24 Jun 2021 12:44:39 -0400 Subject: [PATCH 1/3] Update example commands for hugging face integration --- integrations/huggingface-transformers/README.md | 7 +++---- .../tutorials/sparsifying_bert_using_recipes.md | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/integrations/huggingface-transformers/README.md b/integrations/huggingface-transformers/README.md index 821957f62cf..8936599e38f 100644 --- a/integrations/huggingface-transformers/README.md +++ b/integrations/huggingface-transformers/README.md @@ -63,9 +63,6 @@ For example, pruning and quantizing a model on the SQuAD dataset can be done by ```bash python transformers/examples/pytorch/question-answering/run_qa.py \ --model_name_or_path bert-base-uncased \ - --distill_teacher MODELS_DIR/bert-base-12layers \ - --distill_hardness 1.0 \ - --distill_temperature 2.0 \ --dataset_name squad \ --do_train \ --do_eval \ @@ -80,7 +77,9 @@ python transformers/examples/pytorch/question-answering/run_qa.py \ --fp16 \ --num_train_epochs 30 \ --recipe recipes/bert-base-12layers_prune80.md \ - --onnx_export_path MODELS_DIR/bert-base-12layers_prune80/onnx + --onnx_export_path MODELS_DIR/bert-base-12layers_prune80/onnx \ + --save_strategy epoch \ + --save_total_limit 2 ``` ### Structure diff --git a/integrations/huggingface-transformers/tutorials/sparsifying_bert_using_recipes.md b/integrations/huggingface-transformers/tutorials/sparsifying_bert_using_recipes.md index eda3b4d016f..75fca317925 100644 --- a/integrations/huggingface-transformers/tutorials/sparsifying_bert_using_recipes.md +++ b/integrations/huggingface-transformers/tutorials/sparsifying_bert_using_recipes.md @@ -145,7 +145,7 @@ python transformers/examples/pytorch/question-answering/run_qa.py \ --output_dir MODELS_DIR/bert-base-12layers_prune80/eval \ --cache_dir cache \ --preprocessing_num_workers 6 \ - --onnx_export_path MODELS_DIR/bert-base-12layers_prune80/onnx \ + --onnx_export_path MODELS_DIR/bert-base-12layers_prune80/onnx ``` If it runs successfully, you will have the converted `model.onnx` in `MODELS_DIR/bert-base-12layers_prune80/onnx`. You can now run it in ONNX-compatible inference engines such as [DeepSparse](https://github.com/neuralmagic/deepsparse). The `DeepSparse Engine` is explicitly coded to support running sparsified models for significant improvements in inference performance. From 3e24a270695cec62f31ccadc000f5aff29e08e7f Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Sun, 27 Jun 2021 14:54:38 -0400 Subject: [PATCH 2/3] Phased pruning implementation --- .../pytorch/optim/modifier_pruning.py | 53 +++++++++++++++++++ .../pytorch/optim/test_modifier_pruning.py | 22 +++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/sparseml/pytorch/optim/modifier_pruning.py b/src/sparseml/pytorch/optim/modifier_pruning.py index 0b1bd58684e..6fb55a6467c 100644 --- a/src/sparseml/pytorch/optim/modifier_pruning.py +++ b/src/sparseml/pytorch/optim/modifier_pruning.py @@ -493,6 +493,9 @@ class GMPruningModifier(_PruningParamsModifier): immediately after or doing some other prune :param inter_func: the type of interpolation function to use: [linear, cubic, inverse_cubic] + :param phased: True to enable a phased approach where pruning will + turn on and off with the update_frequency. Starts with pruning on + at start_epoch, off at start_epoch + update_frequency, and so on. :param log_types: The loggers to allow the learning rate to be logged to, default is __ALL__ :param mask_type: String to define type of sparsity (options: ['unstructured', @@ -514,6 +517,7 @@ def __init__( params: Union[str, List[str]], leave_enabled: bool = True, inter_func: str = "cubic", + phased: bool = False, log_types: Union[str, List[str]] = ALL_TOKEN, mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured", global_sparsity: bool = False, @@ -531,6 +535,7 @@ def __init__( self._final_sparsity = final_sparsity self._leave_enabled = convert_to_bool(leave_enabled) self._inter_func = inter_func + self._phased = phased self._mask_type = mask_type self._mask_creator = ( mask_type @@ -612,6 +617,24 @@ def inter_func(self, value: str): self._inter_func = value self.validate() + @ModifierProp() + def phased(self) -> bool: + """ + :return: True to enable a phased approach where pruning will + turn on and off with the update_frequency. Starts with pruning on + at start_epoch, off at start_epoch + update_frequency, and so on. + """ + return self._phased + + @phased.setter + def phased(self, value: bool): + """ + :param value: the type of interpolation function to use: + [linear, cubic, inverse_cubic] + """ + self._phased = value + self.validate() + @ModifierProp() def mask_type(self) -> Union[str, List[int], PruningMaskCreator]: """ @@ -763,6 +786,16 @@ def _check_mask_update(self, module: Module, epoch: float, steps_per_epoch: int) self._final_sparsity, self._inter_func, ) + + # make sure if phased that the phases end at the final sparsity + # if it doesn't divide evenly + if self.phased and not self.end_pending(epoch, steps_per_epoch): + # adjust for phased pruning: start=on, start+update=off + phase = math.floor((epoch - self.start_epoch) / self.update_frequency) + if phase % 2 != 0: + # odd update phase, turn sparsity off + self._applied_sparsity = 0.0 + self._module_masks.set_param_masks_from_sparsity(self._applied_sparsity) if self.end_pending(epoch, steps_per_epoch): @@ -843,6 +876,9 @@ class MagnitudePruningModifier(GMPruningModifier): immediately after or doing some other prune :param inter_func: the type of interpolation function to use: [linear, cubic, inverse_cubic] + :param phased: True to enable a phased approach where pruning will + turn on and off with the update_frequency. Starts with pruning on + at start_epoch, off at start_epoch + update_frequency, and so on. :param log_types: The loggers to allow the learning rate to be logged to, default is __ALL__ :param mask_type: String to define type of sparsity (options: ['unstructured', @@ -860,6 +896,7 @@ def __init__( params: Union[str, List[str]], leave_enabled: bool = True, inter_func: str = "cubic", + phased: bool = False, log_types: Union[str, List[str]] = ALL_TOKEN, mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured", ): @@ -872,6 +909,7 @@ def __init__( params=params, leave_enabled=leave_enabled, inter_func=inter_func, + phased=phased, log_types=log_types, mask_type=mask_type, global_sparsity=False, @@ -933,6 +971,9 @@ class MovementPruningModifier(GMPruningModifier): immediately after or doing some other prune :param inter_func: the type of interpolation function to use: [linear, cubic, inverse_cubic] + :param phased: True to enable a phased approach where pruning will + turn on and off with the update_frequency. Starts with pruning on + at start_epoch, off at start_epoch + update_frequency, and so on. :param log_types: The loggers to allow the learning rate to be logged to, default is __ALL__ :param mask_type: String to define type of sparsity (options: ['unstructured', @@ -950,6 +991,7 @@ def __init__( params: Union[str, List[str]], leave_enabled: bool = True, inter_func: str = "cubic", + phased: bool = False, log_types: Union[str, List[str]] = ALL_TOKEN, mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured", ): @@ -962,6 +1004,7 @@ def __init__( params=params, leave_enabled=leave_enabled, inter_func=inter_func, + phased=phased, log_types=log_types, mask_type=mask_type, global_sparsity=False, @@ -1024,6 +1067,9 @@ class GlobalMagnitudePruningModifier(GMPruningModifier): immediately after or doing some other prune :param inter_func: the type of interpolation function to use: [linear, cubic, inverse_cubic] + :param phased: True to enable a phased approach where pruning will + turn on and off with the update_frequency. Starts with pruning on + at start_epoch, off at start_epoch + update_frequency, and so on. :param log_types: The loggers to allow the learning rate to be logged to, default is __ALL__ :param mask_type: String to define type of sparsity (options: ['unstructured', @@ -1043,6 +1089,7 @@ def __init__( params: Union[str, List[str]] = ALL_PRUNABLE_TOKEN, leave_enabled: bool = True, inter_func: str = "cubic", + phased: bool = False, log_types: Union[str, List[str]] = ALL_TOKEN, mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured", score_type: Union[str, MFACOptions] = "magnitude", @@ -1056,6 +1103,7 @@ def __init__( params=params, leave_enabled=leave_enabled, inter_func=inter_func, + phased=phased, log_types=log_types, mask_type=mask_type, global_sparsity=True, @@ -1115,6 +1163,9 @@ class MFACPruningModifier(GMPruningModifier): immediately after or doing some other prune :param inter_func: the type of interpolation function to use: [linear, cubic, inverse_cubic] + :param phased: True to enable a phased approach where pruning will + turn on and off with the update_frequency. Starts with pruning on + at start_epoch, off at start_epoch + update_frequency, and so on. :param log_types: The loggers to allow the learning rate to be logged to, default is __ALL__ :param mask_type: String to define type of sparsity (options: ['unstructured', @@ -1139,6 +1190,7 @@ def __init__( params: Union[str, List[str]], leave_enabled: bool = True, inter_func: str = "cubic", + phased: bool = False, log_types: Union[str, List[str]] = ALL_TOKEN, mask_type: Union[str, List[int], PruningMaskCreator] = "unstructured", mfac_options: Dict[str, Any] = None, @@ -1152,6 +1204,7 @@ def __init__( params=params, leave_enabled=leave_enabled, inter_func=inter_func, + phased=phased, log_types=log_types, mask_type=mask_type, global_sparsity=True, diff --git a/tests/sparseml/pytorch/optim/test_modifier_pruning.py b/tests/sparseml/pytorch/optim/test_modifier_pruning.py index 9ef2c98614a..31b2546d757 100644 --- a/tests/sparseml/pytorch/optim/test_modifier_pruning.py +++ b/tests/sparseml/pytorch/optim/test_modifier_pruning.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import math import pytest import torch @@ -246,6 +247,16 @@ def test_constant_pruning_yaml(): inter_func="cubic", mask_type=[1, 4], ), + lambda: GMPruningModifier( + params=["__ALL_PRUNABLE__"], + init_sparsity=0.9, + final_sparsity=0.9, + start_epoch=10.0, + end_epoch=25.0, + update_frequency=2.0, + inter_func="cubic", + phased=True, + ), ], scope="function", ) @@ -294,7 +305,16 @@ def test_lifecycle( epoch += modifier.update_frequency assert modifier.update_ready(epoch, test_steps_per_epoch) modifier.scheduled_update(model, optimizer, epoch, test_steps_per_epoch) - assert modifier.applied_sparsity > last_sparsity + + if not modifier.phased: + assert modifier.applied_sparsity > last_sparsity + else: + pruned_on = math.floor((epoch - modifier.start_epoch) / modifier.update_frequency) % 2 == 0 + if pruned_on: + assert modifier.applied_sparsity >= last_sparsity + else: + assert modifier.applied_sparsity == 0 + last_sparsity = modifier.applied_sparsity _ = model(test_batch) # check forward pass From 328a04f7265d71bcd6aecc78055834348f5597b2 Mon Sep 17 00:00:00 2001 From: Mark Kurtz Date: Sun, 27 Jun 2021 15:01:20 -0400 Subject: [PATCH 3/3] Update for quality --- tests/sparseml/pytorch/optim/test_modifier_pruning.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/sparseml/pytorch/optim/test_modifier_pruning.py b/tests/sparseml/pytorch/optim/test_modifier_pruning.py index 31b2546d757..9b02d1c651e 100644 --- a/tests/sparseml/pytorch/optim/test_modifier_pruning.py +++ b/tests/sparseml/pytorch/optim/test_modifier_pruning.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import math +import os import pytest import torch @@ -309,7 +309,13 @@ def test_lifecycle( if not modifier.phased: assert modifier.applied_sparsity > last_sparsity else: - pruned_on = math.floor((epoch - modifier.start_epoch) / modifier.update_frequency) % 2 == 0 + pruned_on = ( + math.floor( + (epoch - modifier.start_epoch) / modifier.update_frequency + ) + % 2 + == 0 + ) if pruned_on: assert modifier.applied_sparsity >= last_sparsity else: