diff --git a/opacus/grad_sample/grad_sample_module.py b/opacus/grad_sample/grad_sample_module.py index 19b5ffa6..745bb98b 100644 --- a/opacus/grad_sample/grad_sample_module.py +++ b/opacus/grad_sample/grad_sample_module.py @@ -207,7 +207,7 @@ def add_hooks( ) self.autograd_grad_sample_hooks.append( - module.register_backward_hook( + module.register_full_backward_hook( partial( self.capture_backprops_hook, loss_reduction=loss_reduction, diff --git a/opacus/optimizers/__init__.py b/opacus/optimizers/__init__.py index 88f79a8d..bac211d0 100644 --- a/opacus/optimizers/__init__.py +++ b/opacus/optimizers/__init__.py @@ -13,10 +13,7 @@ # limitations under the License. from .adaclipoptimizer import AdaClipDPOptimizer -from .ddp_perlayeroptimizer import ( - DistributedPerLayerOptimizer, - SimpleDistributedPerLayerOptimizer, -) +from .ddp_perlayeroptimizer import SimpleDistributedPerLayerOptimizer from .ddpoptimizer import DistributedDPOptimizer from .ddpoptimizer_fast_gradient_clipping import ( DistributedDPOptimizerFastGradientClipping, @@ -28,7 +25,6 @@ __all__ = [ "AdaClipDPOptimizer", - "DistributedPerLayerOptimizer", "DistributedDPOptimizer", "DPOptimizer", "DPOptimizerFastGradientClipping", @@ -55,9 +51,7 @@ def get_optimizer_class(clipping: str, distributed: bool, grad_sample_mode: str elif clipping == "per_layer" and distributed is False: return DPPerLayerOptimizer elif clipping == "per_layer" and distributed is True: - if grad_sample_mode == "hooks": - return DistributedPerLayerOptimizer - elif grad_sample_mode == "ew": + if grad_sample_mode == "hooks" or grad_sample_mode == "ew": return SimpleDistributedPerLayerOptimizer else: raise ValueError(f"Unexpected grad_sample_mode: {grad_sample_mode}") diff --git a/opacus/tests/multigpu_gradcheck.py b/opacus/tests/multigpu_gradcheck.py index 6242d8e1..1e8e8456 100644 --- a/opacus/tests/multigpu_gradcheck.py +++ b/opacus/tests/multigpu_gradcheck.py @@ -26,10 +26,7 @@ from opacus import PrivacyEngine from opacus.distributed import DifferentiallyPrivateDistributedDataParallel as DPDDP from opacus.grad_sample import GradSampleModuleFastGradientClipping -from opacus.optimizers.ddp_perlayeroptimizer import ( - DistributedPerLayerOptimizer, - SimpleDistributedPerLayerOptimizer, -) +from opacus.optimizers.ddp_perlayeroptimizer import SimpleDistributedPerLayerOptimizer from opacus.optimizers.ddpoptimizer import DistributedDPOptimizer from opacus.optimizers.ddpoptimizer_fast_gradient_clipping import ( DistributedDPOptimizerFastGradientClipping, @@ -165,10 +162,7 @@ def demo_basic(rank, weight, world_size, dp, clipping, grad_sample_mode): grad_sample_mode=grad_sample_mode, ) if clipping == "per_layer": - assert isinstance( - optimizer, - (DistributedPerLayerOptimizer, SimpleDistributedPerLayerOptimizer), - ) + assert isinstance(optimizer, SimpleDistributedPerLayerOptimizer) else: assert isinstance(optimizer, DistributedDPOptimizer)