diff --git a/opacus/grad_sample/embedding.py b/opacus/grad_sample/embedding.py index 9a2c2637..8fe0621b 100644 --- a/opacus/grad_sample/embedding.py +++ b/opacus/grad_sample/embedding.py @@ -15,6 +15,7 @@ from typing import Dict, List +from opacus.grad_sample import embedding_norm_sample import torch import torch.nn as nn from opacus.grad_sample import embedding_norm_sample diff --git a/opacus/grad_sample/embedding_norm_sample.py b/opacus/grad_sample/embedding_norm_sample.py index 9e2ccf94..409af071 100644 --- a/opacus/grad_sample/embedding_norm_sample.py +++ b/opacus/grad_sample/embedding_norm_sample.py @@ -46,6 +46,12 @@ def compute_embedding_norm_sample( activations: [tensor([[1, 1], [2, 0], [2, 0]])] + backprops: tensor([[0.2000], + [0.2000], + [0.3000], + [0.1000], + [0.3000], + [0.1000]]) backprops: tensor([[[0.2], [0.2]], [[0.3], [0.1]], [[0.3], [0.1]]]) diff --git a/opacus/privacy_engine.py b/opacus/privacy_engine.py index cacacee0..fa123759 100644 --- a/opacus/privacy_engine.py +++ b/opacus/privacy_engine.py @@ -309,7 +309,7 @@ def make_private( noise_generator=None, grad_sample_mode: str = "hooks", **kwargs, - ) -> Tuple[GradSampleModule, DPOptimizer, DataLoader]: + ): """ Add privacy-related responsibilities to the main PyTorch training objects: model, optimizer, and the data loader. @@ -359,12 +359,15 @@ def make_private( details Returns: - Tuple of (model, optimizer, data_loader). + Tuple of (model, optimizer, criterion (if grad_sample_model="ghost"), data_loader). Model is a wrapper around the original model that also computes per sample gradients Optimizer is a wrapper around the original optimizer that also does gradient clipping and noise addition to the gradients + Criterion is a wrapper around the original criterion that does two + backward pass under the hood. Returned if grad_sample_mode is + "ghost". DataLoader is a brand new DataLoader object, constructed to behave as equivalent to the original data loader, possibly with updated sampling mechanism. Points to the same dataset object. @@ -497,17 +500,23 @@ def make_private_with_epsilon( details Returns: - Tuple of (model, optimizer, data_loader). + Tuple of (model, optimizer, criterion (if grad_sample_mode="ghost"), data_loader). Model is a wrapper around the original model that also computes per sample gradients Optimizer is a wrapper around the original optimizer that also does gradient clipping and noise addition to the gradients + Criterion is a wrapper around the original criterion that does two + backward pass under the hood. Returned if grad_sample_mode is + "ghost". DataLoader is a brand new DataLoader object, constructed to behave as equivalent to the original data loader, possibly with updated sampling mechanism. Points to the same dataset object. """ sample_rate = 1 / len(data_loader) + epsilon_tolerance = kwargs.get( + "epsilon_tolerance", 0.01 + ) # same default as in get_noise_multiplier if len(self.accountant) > 0: warnings.warn( @@ -527,6 +536,7 @@ def make_private_with_epsilon( sample_rate=sample_rate, epochs=epochs, accountant=self.accountant.mechanism(), + epsilon_tolerance=epsilon_tolerance, **kwargs, ), max_grad_norm=max_grad_norm, diff --git a/opacus/tests/grad_sample_module_fast_gradient_clipping_test.py b/opacus/tests/grad_sample_module_fast_gradient_clipping_test.py index 5d1e2a01..ea767c3d 100644 --- a/opacus/tests/grad_sample_module_fast_gradient_clipping_test.py +++ b/opacus/tests/grad_sample_module_fast_gradient_clipping_test.py @@ -351,6 +351,7 @@ def test_norm_calculation(self): diff = flat_norms_normal - flat_norms_gc logging.info(f"Diff = {diff}") + msg = "Fail: Gradient norms from vanilla DP-SGD and from fast gradient clipping are different" assert torch.allclose(flat_norms_normal, flat_norms_gc, atol=1e-3), msg diff --git a/opacus/tests/grad_samples/embedding_norm_sample_test.py b/opacus/tests/grad_samples/embedding_norm_sample_test.py index 8053dc6f..22e47b97 100644 --- a/opacus/tests/grad_samples/embedding_norm_sample_test.py +++ b/opacus/tests/grad_samples/embedding_norm_sample_test.py @@ -15,6 +15,7 @@ import unittest + import torch import torch.nn as nn from opacus.grad_sample import embedding_norm_sample @@ -36,11 +37,15 @@ def test_compute_embedding_norm_sample(self): # Example input ids (activations). Shape: [3, 2] input_ids = torch.tensor([[1, 1], [2, 0], [2, 0]], dtype=torch.long) - # Example backprops. Shape: [3, 2, 1] - backprops = torch.tensor( - [[[0.2], [0.2]], [[0.3], [0.1]], [[0.3], [0.1]]], dtype=torch.float32 + # Example gradients with respect to the embedding output (backprops). + # Shape: [6, 1] + grad_values = torch.tensor( + [[0.2], [0.2], [0.3], [0.1], [0.3], [0.1]], dtype=torch.float32 ) + # Simulate backprop through embedding layer + backprops = grad_values + # Wrap input_ids in a list as expected by the norm sample function activations = [input_ids] @@ -66,7 +71,7 @@ def test_compute_embedding_norm_sample_with_non_one_embedding_dim(self): # Manually set weights for the embedding layer for testing embedding_layer.weight = nn.Parameter( - torch.tensor([[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]], dtype=torch.float32) + torch.tensor([[0.1], [0.2], [0.3]], dtype=torch.float32) ) # Example input ids (activations). Shape: [6, 1, 1]. @@ -74,9 +79,9 @@ def test_compute_embedding_norm_sample_with_non_one_embedding_dim(self): [[[1]], [[1]], [[2]], [[0]], [[2]], [[0]]], dtype=torch.long ) - # Example backprops per input id, with embedding_dim=2. + # Example gradients per input id, with embedding_dim=2. # Shape: [6, 1, 1, 2] - backprops = torch.tensor( + grad_values = torch.tensor( [ [[[0.2, 0.2]]], [[[0.2, 0.2]]], @@ -88,6 +93,9 @@ def test_compute_embedding_norm_sample_with_non_one_embedding_dim(self): dtype=torch.float32, ) + # Simulate backprop through embedding layer + backprops = grad_values + # Wrap input_ids in a list as expected by the grad norm function activations = [input_ids] @@ -204,6 +212,7 @@ def test_compute_embedding_norm_sample_with_extra_activations_per_example(self): expected_norms = torch.tensor( [0.0150, 0.0071, 0.0005, 0.0081, 0.0039], dtype=torch.float32 ) + print("expected_norms: ", expected_norms) computed_norms = result[embedding_layer.weight] # Verify the computed norms match the expected norms